#!/bin/bash
echo "start run summary module..."

binDir=$(cd `dirname $0`; pwd)
transDir=$1
species=$2


summary=$transDir/summary.txt

#cpc2
cpc2_file=$transDir/../cpc2/result.txt
loc_file=$transDir/loc.txt
paste <(tail -n1 $cpc2_file |cut -f1,7,8) <(cut -f6 $loc_file )  > $summary


#exp
if [[ $species == "hg38" ]];then 
exp_normal_file=$transDir/expression/normal/final.fpkm
exp_cancer_file=$transDir/expression/cancer/final.fpkm
elif [[ $species == "mm10" ]];then
exp_normal_file=$transDir/expression/tissue/final.fpkm
exp_cancer_file=$transDir/expression/cell_line/final.fpkm
fi
normal_num=`awk 'BEGIN {count = 0}{if($2!=0)count+=1}END {print(count)}' $exp_normal_file`
normal_name=`awk '{if($2!=0)print $0}' $exp_normal_file  |sort -rnk2 |head -n 3 |cut -f1 |xargs |sed s'/ /, /g'`
cancer_num=`awk 'BEGIN {count = 0}{if($2!=0)count+=1}END {print(count)}' $exp_cancer_file`
cancer_name=`awk '{if($2!=0)print $0}' $exp_cancer_file  |sort -rnk2 |head -n 3 |cut -f1 |xargs |sed s'/ /, /g'`
echo -e "$normal_num\t$normal_name\t$cancer_num\t$cancer_name" >> $summary

#transcriptional regulation
trans_reg_file=$transDir/transcription_regulation/details.txt
tf_num=`cat $trans_reg_file |grep -v Uniprot_id | cut -f2 |sort |uniq |wc -l`
cellline_num=`cat $trans_reg_file |grep -v Uniprot_id | cut -f3 |sort |uniq |wc -l`
cellline_name=`cat $trans_reg_file |grep -v Uniprot_id | cut -f3 |sort |uniq|head -n 3 |sed s'/$/&,/g' |xargs |sed s'/.$//g'`
echo -e "$tf_num\t$cellline_num\t$cellline_name" >> $summary

#miRNA binding
mir_binding_file=$transDir/miRNA_interaction/details.txt
mirna_num=`cat $mir_binding_file  | cut -f1 |sort |uniq |wc -l`
mirna_name=`cat $mir_binding_file  | cut -f1 |sort |uniq |head -n 4 |xargs |sed s'/ /, /g'`
clip_mirna_num=`cat $mir_binding_file  | grep YES | cut -f1 |sort |uniq |wc -l`
clip_mirna_name=`cat $mir_binding_file  | grep YES | cut -f1 |sort |uniq |head -n 4 |xargs |sed s'/ /, /g'`

echo -e "$mirna_num\t$mirna_name\t$clip_mirna_num\t$clip_mirna_name" >> $summary


#protein interaction
pro_file=$transDir/protein_interaction/lncpro.result.final
clip_pro_file=$transDir/protein_interaction/intersected.clip.peak
pro_num=`cut -f1 $pro_file |sort |uniq |wc -l`
pro_name=`cut -f1 $pro_file |sort |uniq |head -n4 | xargs |sed s'/ /, /g'`
clip_pro_num=`cut -f7 $clip_pro_file |sort |uniq |wc -l`
clip_cellline_num=`cut -f9 $clip_pro_file |sort |uniq |wc -l`
clip_pro_name=`cut -f7 $clip_pro_file |sort |uniq |head -n 4| xargs |sed s'/ /, /g'`

echo -e "$pro_num\t$pro_name\t$clip_pro_num\t$clip_cellline_num\t$clip_pro_name" >> $summary

#evolution
evo_file=$transDir/evolution/conservation_score.summary
exon_cons=`awk -F "\t" '{if($1=="exon" && $2=="phyloP" && $7>1)print($3)}' $evo_file |xargs|sed s'/ /, /g'`
exon_fast=`awk -F "\t" '{if($1=="exon" && $2=="phyloP" && $7<-1)print($3)}' $evo_file |xargs|sed s'/ /, /g'`
promoter_cons=`awk -F "\t" '{if($1=="promoter" && $2=="phyloP" && $7>1)print($3)}' $evo_file |xargs|sed s'/ /, /g'`
promoter_fast=`awk -F "\t" '{if($1=="promoter" && $2=="phyloP" && $7<-1)print($3)}' $evo_file |xargs|sed s'/ /, /g'`
daf_purifying=`awk -F "\t" '{if($2=="DAF" && $7<-1)print(Yes)}' $evo_file |uniq`
echo -e "$exon_cons\t$exon_fast\t$promoter_cons\t$promoter_fast\t$daf_purifying" >>$summary

#genetic_association
#wait
echo -e "" >>$summary

#co-expression
if [[ $species == "hg38" ]];then
	normal_file="normal"
	cancer_file="cancer"
elif [[ $species == "mm10" ]];then
	normal_file="tissue"
	cancer_file="cell_line"
fi
if [[ -f $transDir/co_expression_function/$normal_file/positive.correlated.gene.txt ]];then
coexp_positive_normal_num=`cat $transDir/co_expression_function/$normal_file/positive.correlated.gene.txt |wc -l`
fi
if [[ -f $transDir/co_expression_function/$normal_file/negative.correlated.gene.txt ]];then
coexp_negative_normal_num=`cat $transDir/co_expression_function/$normal_file/negative.correlated.gene.txt |wc -l`
fi
if [[ -f $transDir/co_expression_function/$cancer_file/positive.correlated.gene.txt ]];then
coexp_positive_cancer_num=`cat $transDir/co_expression_function/$cancer_file/positive.correlated.gene.txt |wc -l`
fi
if [[ -f $transDir/co_expression_function/$cancer_file/negative.correlated.gene.txt ]];then
coexp_negative_cancer_num=`cat $transDir/co_expression_function/$cancer_file/negative.correlated.gene.txt |wc -l`
fi
if [[ -f $transDir/co_expression_function/$normal_file/positive.correlated.gene.BP.txt ]];then
	coexp_positive_normal_BP=`cat $transDir/co_expression_function/$normal_file/positive.correlated.gene.BP.txt |awk '{if($3<0.01)print $0}' |head -n1 |cut -f2`
fi
if [[ -f $transDir/co_expression_function/$normal_file/negative.correlated.gene.BP.txt ]];then
	coexp_negative_normal_BP=`cat $transDir/co_expression_function/$normal_file/positive.correlated.gene.BP.txt |awk '{if($3<0.01)print $0}' |head -n1 |cut -f2`
fi
if [[ -f $transDir/co_expression_function/$cancer_file/positive.correlated.gene.BP.txt ]];then
	coexp_positive_cancer_BP=`cat $transDir/co_expression_function/$cancer_file/positive.correlated.gene.BP.txt |awk '{if($3<0.01)print $0}' |head -n1 |cut -f2`
fi
if [[ -f $transDir/co_expression_function/$cancer_file/negative.correlated.gene.BP.txt ]];then
	coexp_negative_cancer_BP=`cat $transDir/co_expression_function/$cancer_file/positive.correlated.gene.BP.txt |awk '{if($3<0.01)print $0}' |head -n1 |cut -f2`
fi
if [[ -f $transDir/co_expression_function/$normal_file/positive.correlated.gene.MF.txt ]];then
	coexp_positive_normal_MF=`cat $transDir/co_expression_function/$normal_file/positive.correlated.gene.MF.txt |awk '{if($3<0.01)print $0}' |head -n1 |cut -f2`
fi
if [[ -f $transDir/co_expression_function/$normal_file/negative.correlated.gene.MF.txt ]];then
	coexp_negative_normal_MF=`cat $transDir/co_expression_function/$normal_file/positive.correlated.gene.MF.txt |awk '{if($3<0.01)print $0}' |head -n1 |cut -f2`
fi
if [[ -f $transDir/co_expression_function/$cancer_file/positive.correlated.gene.MF.txt ]];then
	coexp_positive_cancer_MF=`cat $transDir/co_expression_function/$cancer_file/positive.correlated.gene.MF.txt |awk '{if($3<0.01)print $0}' |head -n1 |cut -f2`
fi
if [[ -f $transDir/co_expression_function/$cancer_file/negative.correlated.gene.MF.txt ]];then
	coexp_negative_cancer_MF=`cat $transDir/co_expression_function/$cancer_file/positive.correlated.gene.MF.txt |awk '{if($3<0.01)print $0}' |head -n1 |cut -f2`
fi

echo -e "$coexp_positive_normal_num\t$coexp_positive_normal_BP\t$coexp_positive_normal_MF\t$coexp_negative_normal_num\t$coexp_negative_normal_BP\t$coexp_negative_normal_MF\t$coexp_positive_cancer_num\t$coexp_positive_cancer_BP\t$coexp_positive_cancer_MF\t$coexp_negative_cancer_num\t$coexp_negative_cancer_BP\t$coexp_negative_cancer_MF" >>$summary

#loc-sub
if [[ $species == "hg38" ]];then
localization_file=$transDir/localization/localization.txt
cyto_num=`sed -n '2,$p' $localization_file  | awk 'BEGIN {count = 0}{if($2<0 && $3<0.05)count+=1}END {print(count)}' `
nuclear_num=`sed -n '2,$p' $localization_file  | awk 'BEGIN {count = 0}{if($2>0 && $3<0.05)count+=1}END {print(count)}' `
echo -e "$cyto_num\t$nuclear_num" >> $summary
fi



echo "summary module is done"
