#!/bin/bash

echo "start run co-expression module..."

binDir=$(cd `dirname $0`;pwd)
transDir=$1
species=$2
exprDir=$3
dataDir=$4/$species/co_expression_function
annoDir=$binDir/../../data/$species/annotation
outDir=$transDir/co_expression_function
rm -rf $outDir
mkdir -p $outDir

case $species in
    hg19)
        anno=$annoDir/annolnc_v1_exon.gtf
        ;;
    hg38)
        anno=$annoDir/gencode.v32.annotation.gtf
        ;;
    mm10)
        anno=$annoDir/gencode.vM23.annotation.gtf
        ;;
    *)
        echo "Unsupported species: $species"
        exit 1
        ;;
esac

geneListFile=$dataDir/gene_to_trans.id.map
###### prepare GO set ######
if [ ! -f $dataDir/GO_set.Rdata ] || [ ! -f $dataDir/ENSG_has_GO.txt ] || [ ! -f $dataDir/ENSG_to_symbol.txt ]; then
    if [ ! -f $geneListFile ]; then
        cat $anno | grep -w 'transcript' | awk -F "\"" 'OFS="\t"{print $2,$4}' > $geneListFile
    fi 
    Rscript $binDir/prepare_GO_set.R $geneListFile $species $dataDir
    Rscript $binDir/prepare_GO_mat.R $dataDir
fi

sample_group=$(ls $exprDir/*.sample.list)
###### gene filtration by expression level and tissue specificity
for sample in $sample_group;do
    sample_label=$(basename $sample | sed 's/.sample.list//')
    totalExprFile=$exprDir/$sample_label/normalized.fpkm.txt
    totalExprAfterFilter=$exprDir/$sample_label/normalized.fpkm.after.filteration.txt
    if [ ! -f $totalExprAfterFilter ]; then
        echo "gene filteration"
	Rscript $binDir/filter.R $totalExprFile $geneListFile $totalExprAfterFilter
    fi
done

##### GO enrichment
for sample in $sample_group;do
    {
    sample_label=$(basename $sample | sed 's/.sample.list//')
    mkdir -p $outDir/$sample_label
    transExprFile=$transDir/expression/$sample_label/normalized.fpkm
    totalExprAfterFilter=$exprDir/$sample_label/normalized.fpkm.after.filteration.txt
    ###### compute correlation and do GO enrichment ######
    Rscript $binDir/go_enrichment.R $dataDir $transExprFile $totalExprAfterFilter $outDir/$sample_label
    } &
done    

wait
echo "co-expression module is done!"
