#!/bin/bash -e

CUR_DIR="/www/wwwroot/ygopro2.home/app/gfw"
TMP_DIR=$(mktemp -d /tmp/gfwlist.XXXXXX)

#SRC_URL_1="https://gcore.jsdelivr.net/gh/gfwlist/gfwlist/gfwlist.txt"
SRC_URL_1="https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt"
SRC_URL_2="https://raw.githubusercontent.com/pexcn/daily-extras/master/gfwlist-extras.txt"


DEST_FILE="$CUR_DIR/gfwlist.txt"

fetch_src() {
  cd $TMP_DIR

  curl -sSL $SRC_URL_1 | base64 -d > gfwlist-plain.txt

  curl -sSL $SRC_URL_2 -o gfwlist-extras.txt
  cat "$CUR_DIR/gfwlist-add.txt" >> gfwlist-extras.txt
}

gen_list() {
  cd $TMP_DIR

  # patterns from @cokebar/gfwlist2dnsmasq#3b5e3560ede7d1b0a1d02157576822752c48e671
  local ignore_pattern='^\!|\[|^@@|(https?://){0,1}[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+'
  local head_filter_pattern='s#^(\|\|?)?(https?://)?##g'
  local tail_filter_pattern='s#/.*$|%2F.*$##g'
  local domain_pattern='([a-zA-Z0-9][-a-zA-Z0-9]*(\.[a-zA-Z0-9][-a-zA-Z0-9]*)+)'
  local wildcard_pattern='s#^(([a-zA-Z0-9]*\*[-a-zA-Z0-9]*)?(\.))?([a-zA-Z0-9][-a-zA-Z0-9]*(\.[a-zA-Z0-9][-a-zA-Z0-9]*)+)(\*[a-zA-Z0-9]*)?#\4#g'

  # gfwlist filter
  grep -vE $ignore_pattern gfwlist-plain.txt |
    sed -r $head_filter_pattern |
    sed -r $tail_filter_pattern |
    grep -E $domain_pattern |
    sed -r $wildcard_pattern > gfwlist-plain.tmp
  # gfwlist-extras filter
  sed -e '/^$/d' -e '/^#/ d' gfwlist-extras.txt > gfwlist-extras.tmp
  # merge and remove duplicates
  cat gfwlist-extras.tmp gfwlist-plain.tmp | awk '!x[$0]++' > gfwlist.tmp
  sort -b gfwlist.tmp > gfwlist.txt
  # exclude .cn domains
  sed -i '/.cn$/d' gfwlist.txt
}

copy_dest() {
  cp -f $TMP_DIR/gfwlist.txt $DEST_FILE
}

clean_up() {
  rm -r $TMP_DIR
  echo "[$(basename $0 .sh)]: done."
}

fetch_src
gen_list
copy_dest
clean_up
