#!/usr/local/ensembl/bin/perl -w

# Helper script for the GENCODE tracking system, to be run nightly.
#
# Connect to the external DAS servers,
# fetch data, store in internal tracking system db
# and recalculate dependant fields in db

#example:
#      <FEATURE id="ngf2" label="ngf2">
#        <TYPE id="exon">exon</TYPE>
#        <METHOD id="any">any</METHOD>
#        <START>1351419</START>
#        <END>1351628</END>
#        <SCORE>-</SCORE>
#        <ORIENTATION>+</ORIENTATION>
#        <PHASE>-</PHASE>
#      </FEATURE>

package gencode_tracking_system::sources::ucsc_retro;

use strict;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use base 'Exporter';

our @EXPORT = qw( run_update );

sub run_update {
  my ($response, $chrom, $tracking_dbh, $prepare_hash, $user_id, $category_id, $previous_genes) = @_;

  my $current_gene_id;
  my $current_transcript_id;
  my $current_feature_id;
  my $updated;

  my $gene_type   = "ucsc_retrofinder"; #"ucsc_novel_loci";
  my $gene_status = undef;
  my %new_features = ();

  # go through loci
  while (my ($url, $features) = each %$response) {

    my %genes = ();
    my %transcripts  = ();
    my %sub_elements = ();

    if(ref $features eq "ARRAY"){
      print "Received ".scalar @$features." features.\n";# if $VERBOSE;

    FEATURES:
      foreach my $feature (@$features) {

	#remove duplicates from overlapping regions
	if(defined $previous_genes and exists $previous_genes->{$feature->{'feature_id'}}){
	  next FEATURES;
	}
	$new_features{$feature->{'feature_id'}} = 1;

	my %sub_element;
	my $grouphash = $feature->{'group'}->[0];

	#get element type
	my $element_type = $feature->{'type'};
	$element_type    =~ m/((intron)|(UTR)|(exon))/g;
	$element_type    = $1 || "exon";

	my $group_type   = $grouphash->{'group_id'};
	my $strand       = $feature->{'orientation'};
	my $phase        = ".";
	if($feature->{'phase'}){
	  $phase = $feature->{'phase'};
	}
	elsif($element_type eq "exon"){
	  $phase = "0";
	}
	else{
	  $phase = ".";
	}

	#print STDERR "HAVE: $chrom - ".$feature->{'start'}." - ".$feature->{'end'}." - ".$feature->{'type'}.
	#      ", ".$group_type."\n";


	$sub_element{'chrom'}      = $chrom;
	$sub_element{'biotype'}    = $gene_type;
	$sub_element{'status'}     = $gene_status;
	$sub_element{'type'}       = $element_type;
	$sub_element{'start'}      = $feature->{'start'};
	$sub_element{'end'}        = $feature->{'end'};
	$sub_element{'score'}      = ".";
	$sub_element{'strand'}     = $strand;
	$sub_element{'phase'}      = $phase;
	$sub_element{'id'}         = $feature->{'feature_id'};

	if(!exists $transcripts{ $group_type }){
	  #$transcripts{ $feature->{'feature_id'} } = 1;
	  my %transcript;
	  $current_transcript_id = 0;

	  #build structure for transcript
	  $transcript{'chrom'}        = $chrom;
	  $transcript{'biotype'}      = $gene_type;
	  $transcript{'status'}       = $gene_status;
	  $transcript{'type'}         = "transcript";
	  $transcript{'start'}        = $feature->{'start'};
	  $transcript{'end'}          = $feature->{'end'};
	  $transcript{'strand'}       = $strand;
	  $transcript{'id'}           = $group_type;
	  $transcript{'description'}  = $group_type;
	  $transcript{'parent'}       = $group_type;

	  $transcripts{ $group_type } = \%transcript;

	}
	else{
	  if($feature->{'start'} < $transcripts{ $group_type }->{'start'} ){
	    $transcripts{ $group_type }->{'start'} = $feature->{'start'};
	  }
	  elsif($feature->{'end'} > $transcripts{ $group_type }->{'end'} ){
	    $transcripts{ $group_type }->{'end'} = $feature->{'end'};
	  }
	  #print STDERR "Updating to ".$feature->{'start'}." - ".$feature->{'end'}."\n";
	}

	#save entry for exons, etc.
	if(!defined($sub_elements{$group_type})){
	  $sub_elements{$group_type} = [];
	}
	push(@{ $sub_elements{$group_type} }, \%sub_element);
	
      }
      @$features = ();

      foreach my $transcript (keys %transcripts){

	#clone gene from transcript
	my %gene = %{ $transcripts{$transcript} };

	#store gene
	if($VERBOSE){
	  print "GENE:\n";
	  print_element(\%gene);
	}
	$gene{'type'}   = "gene";
	$gene{'id'}     = $gene{'parent'};
	$gene{'parent'} = undef;

	($current_gene_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%gene, 'gene', 0, 0, 
        				  $user_id, $category_id);

	#store transcript
	$transcripts{$transcript}->{'id'} = $transcripts{$transcript}->{'parent'};
	if($VERBOSE){
	  print "TRANSCRIPT $transcript:\n";
	  print_element($transcripts{$transcript});
	}
	($current_transcript_id, $updated) = store_features($tracking_dbh, $prepare_hash, $transcripts{$transcript},
						'transcript', $current_gene_id, 0, 
						$user_id, $category_id);

	#store sub-features
	foreach my $sub_element (@{ $sub_elements{$transcripts{$transcript}->{'id'} } }){
	  if($VERBOSE){
	    print "ELEMENT:\n";
	    print_element($sub_element);
	  }
	  ($current_feature_id, $updated) = store_features($tracking_dbh, $prepare_hash, $sub_element,
					       'subfeature', $current_gene_id, $current_transcript_id, 
						$user_id, $category_id);
	}

      }
    }
  }

  return \%new_features;
}


1;
