#!/bin/bash

##set -euxo pipefail
script_name="${0##*\/}"

LOGDIR=logs
[[ ! -d $LOGDIR ]] && mkdir $LOGDIR

LOGFILE="$LOGDIR/${script_name%\.*}.log"

FTPSITE="https://ftp.ncbi.nih.gov/pub/mmdb/cdd"
DBSITE="${FTPSITE}/little_endian"
DBFILES="_LE\.tar\.gz$"
files=("cddid.tbl.gz" "cdtrack.txt" "family_superfamily_links" "cddannot.dat.gz" "cddannot_generic.dat.gz" "bitscore_specific.txt" "specific_arch.txt" "superfamily_arch.txt")

usage ()
{
	local xcode=0
	while (( $# ))
	do
		echo "Error: $1" 1>&2
		shift
		xcode=255
	done
	
	cat <<EOF

Download data files from NCBI CDD ftp site. These data files are needed for downloadable domain/architecture annotation tools, such as rpsbproc and sparclbl, to work.

Usage:
	
	${script_name} [-d path/to/datafile_dir] -b [path/to/db_dir] [-h]
	
	path/to/datafile_dir:
		The directory to put downloaded datafiles. If omitted, use ./data.
		If the directory does not exist, will try to create it.
		
	path/to/db_dir:
		The directory to put downloaded databases for rpsblast. If omitted,
		use ./db.
		
	-h:
		Display this help message and exit
EOF
exit $xcode
}

dstdir="./data"
dbdir="./db"
while (( $# ))
do
	cmd="$1"
	shift
	case $cmd in
	-d=*)
		dstdir="${cmd#*=}"
		;;
	-d)
		dstdir="$1"
		shift
		;;
	-b=*)
		dbdir="${cmd#*=}"
		;;
	-b)
		dbdir="$1"
		shift
		;;
	-h)
		usage
		;;
	*)
		;;
	esac
done



if [[ ! -d "${dstdir}" ]]
then
	mkdir -p -- "${dstdir}" || usage "Cannot create \"${dstdir}\""
fi

if [[ ! -d "${dbdir}" ]]
then
	mkdir -p -- "${dbdir}" || usage "Cannot create \"${dbdir}\""
fi

echo "Downloading data files to ${dstdir}" 1>&2
echo "Downloading RPS database files to ${dbdir}" 1>&2

excode=0

##other files
for f in ${files[@]}
do
	wget ${FTPSITE}/${f} -O "${dstdir}/${f}" >> "${LOGFILE}" 2>&1 || excode=$?
	if [[ excode -eq 0 ]]
	then
		if [[ "gz" == ${f##*\.} ]]
		then
			gzip -df ${dstdir}/${f} >> "${LOGFILE}" 2>&1
		fi
	else
		echo "Download data file $f failed. Check ${LOGFILE} for details" 1>&2
		exit $excode
	fi
		
done

dl_db()
{
	local _dir="$1"
	shift
	local _dbar="$1"
	shift
	
	local xcode=0
	wget "${DBSITE}/${_dbar}" > "${LOGFILE%\.*}_${_dbar%%\.*}.log" 2>&1 && tar -xzf "${_dbar}" -C "${_dir}" >> "${LOGFILE%\.*}_${_dbar%%\.*}.log" 2>&1 && rm -f "${_dbar}" >> "${LOGFILE%\.*}_${_dbar%%\.*}.log" 2>&1 || xcode=$?
	
	return $xcode
}


declare -A pids
#curl ${DBSITE} 2> /dev/null | awk '{print $9}'
while read dbname
do
	dl_db "${dbdir}" "${dbname}" &
	pids["${dbname%%\.*}"]=$!
	
done < <(curl "${DBSITE}/" 2>> "${LOGFILE}" | sed -E -e 's/^<.*">//' -e 's/<\/a>.*$//' | egrep "$DBFILES")

for key in "${!pids[@]}"
do
	excode=0
	wait ${pids[$key]} || excode=$?
	if [[ excode -ne 0 ]]
	then
		echo "Downloading of ${key} database unsuccessful (exit $excode). Refer to ${LOGFILE%\.*}_${key}.log for details" 1>&2
	fi
done

haserr=
[[ excode -ne 0 ]] && haserr=", but has errors. Refer to log file(s) for details"

echo "${script_name} done${haserr}"
exit $excode

