# #!/usr/local/ensembl/bin/perl -w

# Helper script for the GENCODE tracking system, to be run nightly.
#
# Connect to the OTTER DAS server,
# fetch data, store/update in internal tracking system db

package gencode_tracking_system::sources::havana;

use strict;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use base 'Exporter';

our @EXPORT = qw( run_update );

sub run_update {
  my ($response, $chrom, $tracking_dbh, $prepare_hash, $user_id, $category_id, $previous_genes) = @_;

  my $current_gene_id;
  my $current_transcript_id;
  my $current_feature_id;
  my %genes = ();
  my %transcripts   = ();
  my %new_features  = ();
  my %checked_genes = ();
  my %ignore = ();
  my $updated = 0;
  my $subfeatureupdate = 0;

  # go through loci
  while (my ($url, $features) = each %$response) {

    #print response
    if($VERBOSE){
      #show_das_response($response);
      print "\n";
    }

    if(ref $features eq "ARRAY"){
      print "Received ".scalar @$features." features.\n" if $VERBOSE;

      # go through loci
    FEATURES:
      foreach my $feature (@$features) {

	#remove duplicates from overlapping regions
	if( (defined $previous_genes) && (exists $previous_genes->{$feature->{'feature_id'}})){
	  next FEATURES;
	}
	$new_features{$feature->{'feature_id'}} = 1;

	if(exists $ignore{$feature->{'feature_id'}}){
	  next FEATURES;
	}

	#avoid looking at unchanged annotation multiple times
	#print $feature->{'feature_id'}."\n";
#	if( exists($checked_genes{$feature->{'feature_id'}}) && ($checked_genes{$feature->{'feature_id'}} == 1) ){
#	  print STDERR "skipping...\n";
#	  next FEATURES;
#	}

	my $element_type = $feature->{'type'};

	#skip error features
	if($element_type =~ m/error/){
	  print "!!!Found an error element: ".$feature->{'feature_id'}."\n";
	  $ignore{$feature->{'feature_id'}} = 1;
	  next FEATURES;
	}

	$element_type    =~ m/((intron)|(UTR)|(exon))/g;
	$element_type    = $1 || "exon";

	#skip introns
	if($element_type eq "intron"){
	  next FEATURES;
	}

	#get notes
	my $i = 0;
	my %notes = ();
	while(defined($feature->{'note'}->[$i])){
	  my $morenotes = $feature->{'note'}->[$i];
	  my ($morenotes_type, $morenotes_value) = split('\=', $morenotes);
	  $morenotes_value =~ s/\&\#39\;/\'/g;
	  $notes{$morenotes_type} = $morenotes_value;
	  #print "NOTE: $morenotes_type: $morenotes_value. ($morenotes)\n";
	  $i++;
	}
	my $timestamp_created = $notes{'Created'};
	my $timestamp_updated = $notes{'Lastmod'};

	my $gene_type         = $notes{'Genetype'};
	my $gene_status       = $notes{'Genestatus'};
	my $transcript_type   = $notes{'Transcripttype'};
	my $transcript_status = $notes{'Transcriptstatus'};

	my %sub_element;

	#build structure for exons and general items

	my $grouphash = $feature->{'group'}->[0];
	my $group_type   = $grouphash->{'group_type'};
	my $strand       = $feature->{'orientation'};

	#check phase
	my $phase        = ".";
	if($feature->{'phase'}){
	  $phase = $feature->{'phase'};
	}
	elsif($element_type eq "exon"){
	  $phase = "0";
	}

	#print "Note=".join(", ", @{$grouphash->{'group_type'}})."\n";

	$sub_element{'chrom'}      = $chrom;
	$sub_element{'biotype'}    = $transcript_type;
	$sub_element{'status'}     = $transcript_status;
	$sub_element{'type'}       = $element_type;
	$sub_element{'start'}      = $feature->{'start'};
	$sub_element{'end'}        = $feature->{'end'};
	$sub_element{'score'}      = ".";
	$sub_element{'strand'}     = $strand;
	$sub_element{'phase'}      = $phase;
	$sub_element{'parent'}     = $feature->{'feature_id'};
	$sub_element{'created_on'} = $timestamp_created;
	$sub_element{'updated_on'} = $timestamp_updated;

	if(!exists($genes{ $group_type })){
	  $genes{ $group_type } = 1;
	  my %gene;
	  $current_gene_id = 0;
	  $updated = 0;

	  #get description from group-note
	  my @gene_description = ();
	  foreach my $note (@{$grouphash->{'note'}}){
	    my ($gnote_s, $gnote_string) = split('\=', $note);
	    if($gnote_s eq "Description"){
	      push(@gene_description, 'Description: '.$gnote_string);
	    }
#	    else{
#	      print "Other note: $gnote_s: $gnote_string.\n";
#	    }
	  }

	  if(exists $notes{'Genename'}){
	    push(@gene_description, 'Genename: '.$notes{'Genename'});
	  }
	  if(exists $notes{'Genealias'}){
	    push(@gene_description, 'Genealias: '.$notes{'Genealias'});
	  }

#	  #get link
#	  if($grouphash->{'link'}){
#	    my $genelink = @{ $grouphash->{'link'} }->[0];
#	    print STDERR join(", ", keys %$genelink); print " LINK ".$genelink->{'href'}."\n";
#           push(@gene_description, $genelink->{'href'});
#	  }

	  #build structure for gene
	  $gene{'chrom'}       = $chrom;
	  $gene{'biotype'}     = $gene_type;
	  $gene{'status'}      = $gene_status;
	  $gene{'type'}        = "gene";
	  $gene{'start'}       = $feature->{'target_start'};
	  $gene{'end'}         = $feature->{'target_stop'};
	  $gene{'strand'}      = $strand;
	  $gene{'created_on'}  = $timestamp_created;
	  $gene{'updated_on'}  = $timestamp_updated;
	  $gene{'description'} = join("\n", @gene_description);
	  $gene{'id'}          = $grouphash->{'group_type'};
	  $gene{'version'}     = $notes{'Geneversion'};

	  #store/update entry for gene
	  #print_element(\%gene) if($VERBOSE);
	  print "NEXTGENE: ".$gene{'id'}."\n";
	  ($current_gene_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%gene, 'gene',
							0, 0, $user_id, $category_id);
	  $genes{ $group_type } = $current_gene_id;

#	  if($updated){
#	    $checked_genes{$feature->{'feature_id'}} = 0;
#	    #print "updated!\n";
#	  }

#	  if(!$updated && !exists($checked_genes{$feature->{'feature_id'}})){
#	    #the gene is present and the date stamp is unchanged
#	    $checked_genes{$feature->{'feature_id'}} = 1;
#	    next FEATURES;
#	  }

	}

#################################################
	#@$features = ();
	#next;
#################################################

	if(!exists($transcripts{ $feature->{'feature_id'} })){
	  $transcripts{ $feature->{'feature_id'} } = 1;
	  my %transcript;
	  $current_transcript_id = 0;

	  #get transcript description
	  my @transcript_description = ();
	  
	  foreach my $gnote ($feature->{'note'}){
	    foreach my $note (@$gnote){
	      #print STDERR "NOTE: ".$note."\n";
	      my ($gnote_s, $gnote_string) = split('\=', $note);
	      if( ($gnote_s eq "Description") or ($gnote_s eq "Note") or ($gnote_s eq "Transcriptname")
		  or ($gnote_s eq "Transcriptalias") or ($gnote_s eq "Genename") or ($gnote_s eq "Genealias") ){
		push(@transcript_description, $gnote_s.': '.$gnote_string);
		#print STDERR "TRANSDESC: $gnote_s:".$gnote_string."\n";
	      }
	      #else{ print STDERR "ADD.NOTE: $gnote_s:".$gnote_string."\n"; }
	    }
	  }

	  #build structure for transcript
	  $transcript{'chrom'}        = $chrom;
	  $transcript{'biotype'}      = $transcript_type;
	  $transcript{'status'}       = $transcript_status;
	  $transcript{'type'}         = "transcript";
	  $transcript{'start'}        = $feature->{'target_start'};
	  $transcript{'end'}          = $feature->{'target_stop'};
	  $transcript{'strand'}       = $strand;
	  $transcript{'description'}  = join("\n", @transcript_description);
	  $transcript{'id'}           = $feature->{'feature_id'};
	  $transcript{'parent'}       = $grouphash->{'group_type'};
	  $transcript{'alias'}        = $feature->{'target_id'};
	  $transcript{'created_on'}   = $timestamp_created;
	  $transcript{'updated_on'}   = $timestamp_updated;
	  $transcript{'version'}      = $notes{'Transcriptversion'};

	  #store/update entry for transcript
	  #print_element(\%transcript) if($VERBOSE);
	  print "NEXTTRANS: ".$transcript{'id'}."\n";
	  ($current_transcript_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%transcript,
							      'transcript', $genes{ $group_type }, 0, $user_id, $category_id);
	  $updated = 0;

	  if($updated){
	    print "updated transcript! Refreshing subfeatures for $current_transcript_id!\n";
	    remove_subfeatures($tracking_dbh, $prepare_hash, $current_transcript_id);
	    $subfeatureupdate = 1;
	  }
	  else{
	    $subfeatureupdate = 0;
	  }

#	  if(!$updated && !exists($checked_genes{$feature->{'feature_id'}})){
#	    #the gene is present and the date stamp is unchanged
#	    $checked_genes{$feature->{'feature_id'}} = 1;
#	  }

	}

	#store/update entry for exons, etc.
	#print_element(\%sub_element) if($VERBOSE);
#	($current_feature_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%sub_element,
#							 'subfeature', $current_gene_id, $current_transcript_id,
#							 $user_id, $category_id);
	if($subfeatureupdate){
	  print "storing_subfeature: ".$sub_element{'type'}.", ".$sub_element{'start'}."-".$sub_element{'end'}."\n";
	  ($current_feature_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%sub_element,
							   'subfeature', $current_gene_id, $current_transcript_id,
							   $user_id, $category_id);
	}
	else{
	  $updated = 0;
	}
	#if($updated){
	#  print "updated!\n";
	#}

      }

      @$features = ();
    }
  }

  return \%new_features;
}


1;
