#!/bin/bash
echo "start run secondary structure module..."

binDir=$(cd `dirname $0`; pwd)
export CLASSPATH=$binDir/../../software/VARNAv3-93.jar:$CLASSPATH
export PATH=$binDir/../../software/kentUtils:$PATH
export PATH=$binDir/../../software/scour:$PATH
seqDir=$1
species=$2
drawType=$3
dataDir=$4/$species/evolution/phyloP
seqId=`basename $seqDir`
outDir=$seqDir/secondary_structure
rm -rf $outDir
mkdir -p $outDir


###### seq length filter ######
fastaFile=$seqDir/${seqId}.fa
seq_len=`cat $fastaFile | tail -n 1 | wc -m`
seq_len=$(($seq_len-1))
if [ $seq_len -gt 20000 ]; then
	echo "[Exit 0] The sequence length is $seq_len, larger than 20000."
	tag=`cat $seqDir/status |grep secondary_structure`
	if [[ $tag == ""  ]];then
	echo -e "secondary_structure\tfinished" >> $seqDir/status
	fi
	exit 0
fi

###### run RNAfold ######
cd $outDir
rnafoldResultFile=$outDir/seq_struct.dbn
cat $fastaFile | RNAfold -p -d2 --noLP | grep -P -A 2 "^>" > $rnafoldResultFile

entropy_plot(){
	max=1
	min=0
	colorMapFile=$outDir/${seqId}_entropy.txt
	svgFile=$outDir/${seqId}_entropy.svg
	pngFile=$outDir/${seqId}_entropy.png
	if [ -s $outDir/${seqId}_ss.ps ] && [ -s $outDir/${seqId}_dp.ps ];then
		perl $binDir/relplot.revised.pl $outDir/${seqId}_ss.ps $outDir/${seqId}_dp.ps | tr ';' '\n' | awk '{printf ("%.2f\n", $1)}' | tr "\n" ";" > $colorMapFile
	else
		echo "[Exit 1] The ${seqId}_ss.ps or ${seqId}_dp.ps file is empty or not existed."
	        echo -e "secondary_structure\tfailed" >> $seqDir/status
		exit 1
	fi
	max=1
	min=0
	colorMap=`cat $colorMapFile`
	bash $binDir/sec_struct_al.sh $seqDir $species entropy $$ &
	java -Djava.awt.headless=true fr.orsay.lri.varna.applications.VARNAcmd  -colorMapMax $max  -colorMapMin $min  -algorithm naview -i $rnafoldResultFile  -colorMap "$colorMap" -o $svgFile 2>&1 |tee $outDir/draw_entropy.log
        cat $svgFile  | grep -v ${seqId} > ${svgFile}_tmp
        rsvg-convert -z 0.3 ${svgFile}_tmp -o $pngFile
	mv ${svgFile}_tmp ${svgFile}
	# compress svg
	if [ -s $svgFile ]; then
        	mv -f $svgFile ${svgFile}.tmp
		scour.py -i ${svgFile}.tmp -o $svgFile
		if [ -s $svgFile ]; then
			rm -f ${svgFile}.tmp
		fi
	fi
}

phylop_plot(){
	max=2
	min=-2

	case $species in
		hg19)
			clade_list=("All" "Placental" "Primates")
			;;
		hg38)
			clade_list=("All" "Mammals" "Primates")
			;;
		mm10)
			clade_list=("All" "Euarchontoglire" "Glire" "Placental")
			;;
		*)
			echo "Unsupported species: $species"
			exit 1
	esac

	############# get the phyloP wig file ###################
	regionBedFile=$seqDir/${seqId}_trans1/exon.bed #use first isoform to set colormap
	pslFile=$seqDir/genomic_location/final.psl
	if [ ! -s $regionBedFile ];then
		echo "$regionBedFile is empty, which means no isoform found for this sequence: $seqId"
                echo -e "secondary_structure\tfailed" >> $seqDir/status
		exit 1
	fi
	if [ ! -s $pslFile ]; then
		echo "$pslFile is empty, which means no hit found for this sequence: $seqId"
                echo -e "secondary_structure\tfailed" >> $seqDir/status
		exit 1
	fi

	cd $dataDir
	for clade in ${clade_list[@]};do
		wigFile=$outDir/$species.phyloP.$clade.wig
		colorMap=$outDir/${seqId}_phyloP_$clade.txt
		svgFile=$outDir/${seqId}_phyloP_$clade.svg
		pngFile=$outDir/${seqId}_phyloP_$clade.png
		hgWiggle -bedFile=$regionBedFile $dataDir/$species.phyloP.$clade | grep -P "^[1-9]"  > $wigFile
		if [ ! -s $wigFile ]; then
			echo "$dataDir/$species.phyloP.$clade.wig is empty, which means no phyloP score found for this sequence"
			continue
		fi
		Rscript $binDir/get_phyloP_colormap.R $wigFile $pslFile $colorMap.tmp
		cat $colorMap.tmp | awk '{printf ("%.2f\n", $1)}' | tr "\n" ";" > $colorMap
		rm -f $colorMap.tmp
		colorMap=`cat $colorMap`
		
		bash $binDir/sec_struct_al.sh $seqDir $species phyloP_$clade $$ &
		java -Djava.awt.headless=true fr.orsay.lri.varna.applications.VARNAcmd  -colorMapMax $max  -colorMapMin $min  -algorithm naview -i $rnafoldResultFile  -colorMap "$colorMap" -o $svgFile 2>&1 | tee $outDir/draw_phyloP_${clade}.log
		cat $svgFile  | grep -v ${seqId} > ${svgFile}_tmp
		rsvg-convert -z 0.3 ${svgFile}_tmp -o $pngFile
		mv ${svgFile}_tmp ${svgFile}
		# compress svg
		if [ -s $svgFile ]; then
            		mv -f $svgFile ${svgFile}.tmp
	    		scour.py -i ${svgFile}.tmp -o $svgFile
	    		if [ -s $svgFile ]; then
	        		rm -f ${svgFile}.tmp
	    		fi
		fi
		done
}

###### prepare colormap file  and draw ######
if [ $drawType = 'entropy' ];then 
	entropy_plot
fi
if [ $drawType = 'phyloP' ];then
	phylop_plot
fi
if [ $drawType = 'all' ];then
	entropy_plot
	phylop_plot
fi

#### remove tmp files #####
rm -f $outDir/*.ps
rm -f $outDir/*.wig
rm -f $outDir/*.txt
tag=`cat $seqDir/status |grep secondary_structure`
if [[ $tag == ""  ]];then
echo -e "secondary_structure\tfinished" >> $seqDir/status
fi

echo "secondary structure module is done!"
