#!/usr/local/ensembl/bin/perl -w

# Helper script for the GENCODE tracking system, to be run nightly.
#
# Connect to DAS server with experimental data,
# fetch data, store in internal tracking system db
# as TEC flag


package gencode_tracking_system::sources::experimental_verification;

use strict;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use base 'Exporter';

our @EXPORT    = qw( run_update );

my $tecf_count   = 0;

sub run_update {
  my ($response, $chrom, $tracking_dbh, $prepare_hash, $user_id, $category_id, $previous_genes) = @_;

  my $gene_type    = "experimental_verification";
  my $gene_status  = 'new';
  my %tecs         = ();
  my $updated      = 0;
  my $flag_name    = "TEC_pcr";
  my $flag_user_id = 34;
  my $flag_count   = 0;
  my $flag_id      = 0;
  my %new_features = ();
  my $tec_count    = 0;
  my ($current_transcript_id, $current_gene_id);

  # go through loci
  while (my ($url, $features) = each %$response) {

    if(ref $features eq "ARRAY"){
      print "Received ".scalar @$features." features.\n" if $VERBOSE;

    FEATURES:
      foreach my $feature (@$features) {

	#get the "HAVANA" transcript-id
	my $grouphash  = $feature->{'group'}->[0];
	my $group_id   = $grouphash->{'group_id'};


	print "DAS\t$group_id\t".$feature->{'start'}."\t".$feature->{'end'}."\t";

	#remove duplicates from overlapping regions
	if( (defined $previous_genes) && (exists $previous_genes->{$group_id})){
	  print STDERR "IGNORING\n";
	  next FEATURES;
	}
	$new_features{$group_id} = 1;

	my %notes = ();
	my $i     = 0;
	while(defined($feature->{'note'}->[$i])){
	  my $morenotes = $feature->{'note'}->[$i];
	  my ($morenotes_type, $morenotes_value) = split('\=', $morenotes);
	  $notes{$morenotes_type} = $morenotes_value;
	  print "$morenotes_type=$morenotes_value\t";
	  $i++;
	}
	print STDERR "\n";

next;

	#get original id
	my $org_issue_id = 0;
	my $org_issue    = get_data_by_name($tracking_dbh, "transcript", $group_id);
	if(!$org_issue or (!exists($org_issue->{'id'}))){
	  print STDERR "Could not get $group_id!\n" if($VERBOSE);
	  #create new entry
	  my %transcript;
	  my ($updated, $gene_id);
	  my $descr = "Type: ".$notes{'TRANSCRIPTTYPE'}."\nProject: ".$notes{'PROJECT'};
	  $transcript{'id'}           = $group_id;
	  $transcript{'chrom'}        = $chrom;
	  $transcript{'start'}        = $feature->{'start'};
	  $transcript{'end'}          = $feature->{'end'};
	  $transcript{'strand'}       = $feature->{'orientation'};
	  $transcript{'biotype'}      = $notes{'TRANSCRIPTTYPE'};
	  $transcript{'status'}       = $notes{'TRANSCRIPTSTATUS'};
	  $transcript{'type'}         = "gene";
	  $transcript{'description'}  = $descr;
	  $transcript{'created_on'}   = $notes{'LASTMOD'};
	  $transcript{'updated_on'}   = $notes{'LASTMOD'};

	  ($gene_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%transcript,
						'gene', undef, 0, $user_id, $category_id);
	  print STDERR "Stored as gene: $gene_id.\n" if($VERBOSE);

	  $transcript{'type'}         = "transcript";
	  $transcript{'id'}           = $feature->{'feature_id'};
	  ($org_issue_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%transcript,
						    'transcript', $gene_id, 0, $user_id, $category_id);
	  print STDERR "Stored as transcript: $org_issue_id.\n" if($VERBOSE);

	}
	else{
	  $org_issue_id = $org_issue->{'id'};
	}
	#print STDERR "TEC: $group_id, $org_issue_id\n";

	#check for existing flags
	my $flags = get_flag($tracking_dbh, $org_issue_id, $flag_name);
	
	if(scalar @$flags){
	  $flag_id = $flags->[0]->{'id'};
	  print STDERR "  Found existing flag $flag_name ($flag_id).\n" if($VERBOSE);
	}
	else{
	  print STDERR "  Setting flag $flag_name to $org_issue_id.\n" if($VERBOSE);
	  if($WRITE){
	    $flag_id = set_flag($tracking_dbh, $org_issue_id, $flag_name, undef,
				$flag_user_id, undef);
	    print STDERR "Flag-id=$flag_id\n";
	  }
	  $flag_count++;


	  #store tec
	  my %tec;
	  my $version = 1;
	  my $sel_remark = $notes{"PROJECT"};

	  $tec{'issue_id'}      = $org_issue_id;
	  $tec{'issue_version'} = $version;
	  $tec{'issue_name'}    = $group_id;
	  $tec{'flag_id'}       = $flag_id;
	  $tec{'sel_remark'}    = $sel_remark;
	  $tec{'user_id'}       = $flag_user_id;
	  $tec{'result'}        = "";

	  if($VERBOSE){
	    print "TEC:\n";
	    foreach my $k (keys %tec){
	      print "\t$k=".$tec{$k}."\n";
	    }
	  }
	  my $tec_id = store_tec($tracking_dbh, $prepare_hash, \%tec, $flag_user_id);
	  $tec_count++;
	  $tecs{$group_id} = $tec_id;


	  #store tec-feature
	  #use exon info from db
	  my $sql = "select subfeature_type, subfeature_chr, subfeature_start, subfeature_end, ".
	            "subfeature_strand from subfeatures where issue_id=?";
	  my $tf_sth = custom_prepare($tracking_dbh, $sql);
	  $tf_sth->execute( $tec{'issue_id'} );
	  #print STDERR "LOOKING FOR subfeatures for ".$tec{'issue_id'}."...\n";
	  while(my ($subfeature_type, $subfeature_chr, $subfeature_start, $subfeature_end, $subfeature_strand)
		= $tf_sth->fetchrow_array){
	    #print STDERR "SF!\n";
	    store_tecfeature($tracking_dbh, $prepare_hash, $flag_user_id, $tec_id, $subfeature_type,
			     $subfeature_chr, $subfeature_start, $subfeature_end, $subfeature_strand);
	  }

	}


#	#store tec-feature
#	#use data from DAS source
#	my %tecf;
#	my $seq = "";
##	if(exists $notes{"LEFTPRIMER"}){
##	  $seq = "LEFTPRIMER=".$notes{"LEFTPRIMER"}
##	}

#	$tecf{'tec_id'}       = $tecs{$group_id};
#	$tecf{'type'}         = $feature->{'type'};
#	$tecf{'chrom'}        = $chrom;
#	$tecf{'start'}        = $feature->{'start'};
#	$tecf{'end'}          = $feature->{'end'};
#	$tecf{'strand'}       = $feature->{'orientation'};
#	$tecf{'sequence'}     = $seq;
#	if($VERBOSE){
#	  print "TECFEATURE:\n";
#	  foreach my $k (keys %tecf){
#	    print "\t$k=".$tecf{$k}."\n";
#	  }
#	}
#	my $tecf_id = store_tec_feature($tracking_dbh, $prepare_hash, \%tecf, $user_id);
#	$tecf_count++;



      }
      @$features = ();
    }
  }

  print STDERR "TECs: $tec_count & TEC-Fs: $tecf_count\n$flag_count new $flag_name flags.\n";

  return \%new_features;
}


sub store_tecfeature{
  my ($tracking_dbh, $prepare_hash, $user_id, $tec_id, $type, $chrom, $start, $end, $strand) = @_;

  my %tecf;
  my $seq = "";

  $tecf{'tec_id'}       = $tec_id;
  $tecf{'type'}         = $type;
  $tecf{'chrom'}        = $chrom;
  $tecf{'start'}        = $start;
  $tecf{'end'}          = $end;
  $tecf{'strand'}       = $strand;
  $tecf{'sequence'}     = $seq;
#  if($VERBOSE){
#    print "TECFEATURE:\n";
#    foreach my $k (keys %tecf){
#      print "\t$k=".$tecf{$k}."\n";
#    }
#  }
  my $tecf_id = store_tec_feature($tracking_dbh, $prepare_hash, \%tecf, $user_id);
  $tecf_count++;
}


1;


__END__

http://genome.crg.es:9000/das/Cases_selection_for_experimental_validation/features?segment=21:1,20000000

<FEATURE id="OTTHUMT00000075396_1" label="OTTHUMT00000075396_1">
<TYPE id="exon" category="ECO:0000067">exon</TYPE>
<START>20457162</START>
<END>20457271</END><METHOD id="Cases_for_Experimental_Validation">Cases_for_Experimental_Validation</METHOD>
<SCORE>0</SCORE>
<PHASE>-</PHASE>
<ORIENTATION>-</ORIENTATION>
<NOTE>LASTMOD =2009-05-05T15:30:23+0200</NOTE>
<NOTE>TRANSCRIPTTYPE=protein_coding</NOTE>
<NOTE>TRANSCRIPTSTATUS=KNOWN</NOTE>
<NOTE>PROJECT=U12_pipeline</NOTE>
<NOTE>LEFTPRIMER=CTCGTCACTCGGGTCGTAAT</NOTE>
<NOTE>RIGHTPRIMER=ATGGAATTGGATGACTTGCC</NOTE>
<GROUP id="OTTHUMT00000075396" type="Cases_for_Experimental_Validation_prediction"/>
</FEATURE>


#to remove test insertions:
delete from  projects where created_on like "2009-07-08%";
delete from issues where created_on like "2009-07-08%";
delete f from tecs f left join issues i on f.issue_id=i.id where i.id IS NULL;
delete f from flags f left join issues i on f.issue_id=i.id where f.flag_name="TEC_pcr" and i.id IS NULL;
