# #!/usr/local/ensembl/bin/perl -w

# Helper script for the GENCODE tracking system, to be run nightly.
#
# Connect to the OTTER DAS server,
# fetch data, store/update in internal tracking system db

package gencode_tracking_system::sources::havana;

use strict;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use base 'Exporter';

our @EXPORT = qw( run_update );

sub run_update {
  my ($response, $chrom, $tracking_dbh, $prepare_hash, $user_id, $category_id, $previous_genes) = @_;

  my $current_gene_id;
  my $current_transcript_id;
  my $current_feature_id;
  my %genes = ();
  my %transcripts = ();
  my %new_features = ();

  # go through loci
  while (my ($url, $features) = each %$response) {

    #print response
    if($VERBOSE){
      #show_das_response($response);
      print "\n";
    }

    if(ref $features eq "ARRAY"){
      print STDERR "Received ".scalar @$features." features.\n" if $VERBOSE;

      # go through loci
    FEATURES:
      foreach my $feature (@$features) {

	#remove duplicates from overlapping regions
	if(defined $previous_genes and exists $previous_genes->{$feature->{'feature_id'}}){
	  next FEATURES;
	}
	$new_features{$feature->{'feature_id'}} = 1;

#	#get transcript & gene biotype
#	my $gene_typestring = $feature->{'note'}->[0];
#	my ($gene_type_s, $gene_type, $gene_status) = split('\|', $gene_typestring);

#	my $transcript_typestring = $feature->{'note'}->[1];
#	my ($transcipt_type_s, $transcript_type, $transcript_status) = split('\|', $transcript_typestring);

#      #get transcript timestamp
#      my $transcript_timestamp = $feature->{'note'}->[2];
#      my ($timestamp_s, $timestamp) = split('\|', $transcript_timestamp);
#      print "transcript-date: $timestamp\n";

#      #get other notes
#      my $i = 3;
#      my $morenote_entry = '';
#      while(defined($feature->{'note'}->[$i])){
#		my $morenotes = $feature->{'note'}->[$i];
#		my ($morenotes_type, $morenotes_value) = split('\|', $morenotes);
#		$morenotes_value =~ s/\&\#39\;/\'/g;
#		$morenote_entry .= ';'.$morenotes_type.':'.$morenotes_value;
#		$i++;
#      }

	my $i = 0;
	my %notes = ();
	while(defined($feature->{'note'}->[$i])){
	  my $morenotes = $feature->{'note'}->[$i];
	  my ($morenotes_type, $morenotes_value) = split('\|', $morenotes);
	  #$morenotes_value =~ s/\&\#39\;/\'/g;
	  $notes{$morenotes_type} = $morenotes_value;
	  #print "NOTE: $morenotes_type: $morenotes_value. ($morenotes)\n";
	  $i++;
	}
#	my $timestamp_created = $notes{'Created'};
#	my $timestamp_updated = $notes{'Lastmod'};

#	my $gene_type         = $notes{'Genetype'};
#	my $gene_status       = $notes{'Genestatus'};
	my $transcript_type   = $notes{'TRANSCRIPTTYPE'};
#	my $transcript_status = $notes{'Transcriptstatus'};

	my %sub_element;
	my $grouphash = $feature->{'group'}->[0];

	#build structure for exons and general items
	#find type
	my $element_type = $feature->{'type'};
	$element_type    =~ m/((intron)|(UTR)|(exon))/g;
	$element_type    = $1 || "exon";

	my $group_type   = $grouphash->{'group_type'};
	my $strand       = $feature->{'orientation'};
	my $phase        = ".";
	if($feature->{'phase'}){
	  $phase = $feature->{'phase'};
	}
	elsif($element_type eq "exon"){
	  $phase = "0";
	}

	#print "Note=".join(", ", @{$grouphash->{'group_type'}})."\n";

	$sub_element{'chrom'}      = $chrom;
	$sub_element{'biotype'}    = $transcript_type;
	$sub_element{'status'}     = $transcript_status;
	$sub_element{'type'}       = $element_type;
	$sub_element{'start'}      = $feature->{'start'};
	$sub_element{'end'}        = $feature->{'end'};
	$sub_element{'score'}      = ".";
	$sub_element{'strand'}     = $strand;
	$sub_element{'phase'}      = $phase;
	$sub_element{'parent'}     = $feature->{'feature_id'};
	$sub_element{'created_on'} = $timestamp_created;
	$sub_element{'updated_on'} = $timestamp_updated;

	if(!exists $genes{ $group_type }){
	  $genes{ $group_type } = 1;
	  my %gene;
	  $current_gene_id = 0;

	  #get description
	  my @gene_description = ();
	  foreach my $note (@{$grouphash->{'note'}}){
	    my ($gnote_s, $gnote_string) = split('\=', $note);
	    if($gnote_s eq "Description"){
	      push(@gene_description, $gnote_s.': '.$gnote_string);
	    }
	  }
	  foreach my $gnote ($feature->{'note'}){
	    foreach my $note (@$gnote){
	      my ($gnote_s, $gnote_string) = split('\=', $note);
	      if( ($gnote_s eq "Description") or ($gnote_s eq "Genename") or ($gnote_s eq "Genealias") ){
		push(@gene_description, $gnote_s.': '.$gnote_string);
	      }
	    }
	  }

#	  #get link
#	  if($grouphash->{'link'}){
#	    my $genelink = @{ $grouphash->{'link'} }->[0];
#	    print STDERR join(", ", keys %$genelink); print " LINK ".$genelink->{'href'}."\n";
#           push(@gene_description, $genelink->{'href'});
#	  }

	  #build structure for gene
	  $gene{'chrom'}       = $chrom;
	  $gene{'biotype'}     = $gene_type;
	  $gene{'status'}      = $gene_status;
	  $gene{'type'}        = "gene";
	  $gene{'start'}       = $feature->{'target_start'};
	  $gene{'end'}         = $feature->{'target_stop'};
	  $gene{'strand'}      = $strand;
	  $gene{'id'}          = $feature->{'feature_id'};
	  $gene{'created_on'}  = $timestamp_created;
	  $gene{'updated_on'}  = $timestamp_updated;
	  $gene{'description'} = join("\n", @gene_description);
	  $gene{'id'}          = $grouphash->{'group_type'};

	  #store/update entry for gene
	  print_element(\%gene) if($VERBOSE);
	  $current_gene_id = store_features($tracking_dbh, $prepare_hash, \%gene, 'gene',
					    0, 0, $user_id, $category_id);
	}

	if(!exists $transcripts{ $feature->{'feature_id'} }){
	  $transcripts{ $feature->{'feature_id'} } = 1;
	  my %transcript;
	  $current_transcript_id = 0;

	  #get transcript description
	  my @transcript_description = ();
	  
	  foreach my $gnote ($feature->{'note'}){
	    foreach my $note (@$gnote){
	      #print STDERR "NOTE: ".$note."\n";
	      my ($gnote_s, $gnote_string) = split('\=', $note);
	      if( ($gnote_s eq "Description") or ($gnote_s eq "Note") or ($gnote_s eq "Transcriptname")
		  or ($gnote_s eq "Transcriptalias") or ($gnote_s eq "Genename") or ($gnote_s eq "Genealias") ){
		push(@transcript_description, $gnote_s.': '.$gnote_string);
		#print STDERR "TRANSDESC: $gnote_s:".$gnote_string."\n";
	      }
	    }
	  }

	  #build structure for transcript
	  $transcript{'chrom'}        = $chrom;
	  $transcript{'biotype'}      = $transcript_type;
	  $transcript{'status'}       = $transcript_status;
	  $transcript{'type'}         = "transcript";
	  $transcript{'start'}        = $feature->{'target_start'};
	  $transcript{'end'}          = $feature->{'target_stop'};
	  $transcript{'strand'}       = $strand;
	  $transcript{'description'}  = join("\n", @transcript_description);
	  $transcript{'id'}           = $feature->{'feature_id'};
	  $transcript{'parent'}       = $grouphash->{'group_type'};
	  $transcript{'alias'}        = $feature->{'target_id'};
	  $transcript{'created_on'}   = $timestamp_created;
	  $transcript{'updated_on'}   = $timestamp_updated;

	  #store/update entry for transcript
	  print_element(\%transcript) if($VERBOSE);
	  $current_transcript_id = store_features($tracking_dbh, $prepare_hash, \%transcript,
						  'transcript', $current_gene_id, 0, $user_id, $category_id);
	}

	#store/update entry for exons, etc.
	print_element(\%sub_element) if($VERBOSE);
	$current_feature_id = store_features($tracking_dbh, $prepare_hash, \%sub_element,
					     'subfeature', $current_gene_id, $current_transcript_id,
					     $user_id, $category_id);

      }

      @$features = ();
    }
  }

  return \%new_features;
}


1;


__END__

<FEATURE id="ENST00000328879" label="ENST00000328879">
<TYPE id="exon" category="transcription">exon</TYPE>
<START>19173272</START>
<END>19173498</END>
<SCORE>1</SCORE>
<PHASE>0</PHASE>
<ORIENTATION>-</ORIENTATION>
<NOTE>GENETYPE|ccds_gene</NOTE>
<NOTE>TRANSCRIPTTYPE|protein_coding|</NOTE>
<NOTE>LASTMOD|0</NOTE>
<TARGET id="CCDS13780.1" start="19126360" stop="19173498">GENE|19126360|19173498</TARGET>
<GROUP id="ENST00000328879">
<NOTE>CCDS|CCDS13780.1</NOTE>
<NOTE>DESCR|</NOTE>
<NOTE>GENETYPE|protein_coding</NOTE>
<LINK href="http://www.ensembl.org/Homo_sapiens/transview?transcript=ENST00000328879">show in ensembl transcript view</LINK></GROUP>
</FEATURE>
