#!/bin/bash
echo "prepare bundles..."

binDir=$(cd `dirname $0`; pwd)
bamDir=$1 #bam should be sort by position
sampleFile=$1.sample.list #1th column: sample id; 2nd column: label (eg:tissue, cell_line) 
gtfFile=$2
tmpDir=${1}_tmp #save temporal results
mkdir -p $tmpDir

###### prepare for bundle ######
sample_list=$(cat $sampleFile | cut -f1)
for sample in $sample_list;do
	if [ ! -f $bamDir/$sample.bam ]; then
		echo "$bamDir/$sample.bam does not exist!"
		exit 1
	fi
	if [ ! -f $bamDir/$sample.bundle.bed ] || [ ! -f $bamDir/frag_stat.txt ] || [ ! -f $bamDir/fpkm.txt ]; then
		if [ ! -f $tmpDir/$sample.log ] || [ ! -f $tmpDir/$sample.gtf ]; then
			echo "run stringtie for $sample"
			stringtie $bamDir/$sample.bam -p 10 -o $tmpDir/$sample.gtf -G $gtfFile -e -B -v > $tmpDir/$sample.log 2>&1
			echo $sample stringite finished
		fi
		if [ ! -f $bamDir/$sample.bundle.bed ]; then
			python $binDir/extract_bundle_info.py $tmpDir/$sample.log $tmpDir/$sample.bundle.txt
			python $binDir/bundle_to_bed.py $tmpDir/$sample.bundle.txt $bamDir/$sample.bundle.bed
		fi
	fi
done

###### extract fragment info ######
if [ ! -f $bamDir/frag_stat.txt ] || [ -s $bamDir/frag_stat.txt ]; then
	> $bamDir/frag_stat.txt
	for sample in $sample_list;do
		tcf=`tail $tmpDir/$sample.log | grep "Total count of aligned fragments: " | cut -d ' ' -f 6`
		fcl=`tail $tmpDir/$sample.log | grep "Fragment coverage length:" | cut -d ':' -f 2`
		echo -e "${sample}\t${tcf}\t${fcl}" >> $bamDir/frag_stat.txt
	done
fi

###### merge expression profile ######
if [ ! -f $bamDir/fpkm.txt ]; then
	echo "merge expression profile..."
	grep -w "transcript" $gtfFile | awk -F "\"" '{print $4}' > $tmpDir/total.transcripts.id 
	python $binDir/combine_expr.py $sampleFile $tmpDir/total.transcripts.id $tmpDir $bamDir/fpkm.txt
fi

###### normalization ######
if [ ! -f $bamDir/normalized.fpkm.txt ] || [ ! -f $bamDir/geo_factors.txt ]; then
	echo "normalization..."
	Rscript $binDir/cal_geof.R $bamDir/fpkm.txt $bamDir/normalized.fpkm.txt $bamDir/geo_factors.txt
fi

###### remove tmp files ######
rm -f $tmpDir/*.bundle.txt $tmpDir/*.ctab $tmpDir/total.transcripts.id

echo "bundles are ready!"
