#!/usr/local/ensembl/bin/perl -w

# Helper script for the GENCODE tracking system, to be run nightly.
#
# Connect to the external DAS servers,
# fetch data, store in internal tracking system db
# and recalculate dependant fields in db

package gencode_tracking_system::sources::ucsc_transmap;

use strict;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use base 'Exporter';

our @EXPORT = qw( run_update );

sub run_update {
  my ($response, $chrom, $tracking_dbh, $prepare_hash, $user_id, $category_id, $previous_genes) = @_;

  my $gene_type   = "ucsc_transmap";
  my $gene_status = 'new';

  my ($current_transcript_id, $current_gene_id, $current_feature_id);

  my %genes = ();
  my %transcripts = ();
  my %sub_elements = ();
  my %notes = ();
  my %new_features = ();

  # go through loci
  while (my ($url, $features) = each %$response) {

    if(ref $features eq "ARRAY"){
      print "Received ".scalar @$features." features.\n" if $VERBOSE;

    FEATURES:
      foreach my $feature (@$features) {

	#remove duplicates from overlapping regions
	if(defined $previous_genes and exists $previous_genes->{"TM_".$feature->{'feature_id'}}){
	  next FEATURES;
	}
	$new_features{"TM_".$feature->{'feature_id'}} = 1;

#      my $i = 0;
#      while(defined($feature->{'note'}->[$i])){
#	my $morenotes = $feature->{'note'}->[$i];
#	my ($morenotes_type, $morenotes_value) = split('\|', $morenotes);
#	$morenotes_value =~ s/\&\#39\;/\'/g;
#	$notes{$morenotes_type} = $morenotes_value;
#	#print "NOTE: $morenotes_type: $morenotes_value. ($morenotes)\n";
#	$i++;
#      }
#      my $timestamp_created = $notes{'CREATED'};
#      my $timestamp_updated = $notes{'LASTMOD'};

#      my $gene_type         = $notes{'GENETYPE'};
#      my $transcript_type   = $notes{'TRANSCRIPTTYPE'};

	my ($timestamp_created, $timestamp_updated);
	my %sub_element;

	my $grouphash    = $feature->{'group'}->[0];
	my $group_id     = $grouphash->{'group_id'};
	my $element_type = "exon";
	my $strand       = $feature->{'orientation'};
	my $phase        = ".";
	my $current_gene_description;

	#print "Note=".join(", ", @{$grouphash->{'group_type'}})."\n";

	$sub_element{'chrom'}      = $chrom;
	$sub_element{'biotype'}    = $gene_type;
	$sub_element{'status'}     = $gene_status;
	$sub_element{'type'}       = $element_type;
	$sub_element{'start'}      = $feature->{'start'};
	$sub_element{'end'}        = $feature->{'end'};
	$sub_element{'score'}      = $feature->{'score'};
	$sub_element{'strand'}     = $strand;
	$sub_element{'phase'}      = $phase;
	$sub_element{'id'}         = "TM_".$feature->{'feature_id'};
	$sub_element{'created_on'} = $timestamp_created;
	$sub_element{'updated_on'} = $timestamp_updated;

	if(!exists $genes{ $group_id }){

	  my %gene;
	  $feature->{'feature_id'} =~ /^([\d\w\_]+\.[\d]+).+/;
	  $current_gene_description = $1;

	  #build structure for gene
	  $gene{'chrom'}       = $chrom;
	  $gene{'biotype'}     = $gene_type;
	  $gene{'status'}      = $gene_status;
	  $gene{'type'}        = "gene";
	  $gene{'start'}       = $feature->{'start'};
	  $gene{'end'}         = $feature->{'end'};
	  $gene{'strand'}      = $strand;
	  $gene{'description'} = $current_gene_description;
	  $gene{'created_on'}  = $timestamp_created;
	  $gene{'updated_on'}  = $timestamp_updated;
	  $gene{'id'}          = $group_id;

	  $genes{ $group_id } = \%gene;

	}
	else{
	  if($feature->{'start'} < $genes{ $group_id }->{'start'} ){
	    $genes{ $group_id }->{'start'} = $feature->{'start'};
	  }
	  elsif($feature->{'end'} > $genes{ $group_id }->{'end'} ){
	    $genes{ $group_id }->{'end'} = $feature->{'end'};
	  }
	}

	#save entry for exons, etc.
	if(!defined($sub_elements{$group_id})){
	  $sub_elements{$group_id} = [];
	}
	push(@{ $sub_elements{$group_id} }, \%sub_element);

      }
      @$features = ();

      foreach my $gene (keys %genes){

	#store gene
	if($VERBOSE){
	  print "GENE:\n";
	  print_element($genes{$gene});
	}
	$current_gene_id = store_features($tracking_dbh, $prepare_hash, $genes{$gene}, 'gene', 0, 0, 
        				  $user_id, $category_id);


	#clone gene from transcript
	my %transcript = %{ $genes{$gene} };
	$transcript{'type'} = "transcript";

	#store transcript
	if($VERBOSE){
	  print "TRANSCRIPT:\n";
	  print_element(\%transcript);
	}
	$current_transcript_id = store_features($tracking_dbh, $prepare_hash, \%transcript,
						'transcript', $current_gene_id, 0, $user_id, $category_id);

	#store sub-features
	foreach my $sub_element (@{ $sub_elements{$transcript{'id'} } }){
	  if($VERBOSE){
	    print "ELEMENT:\n";
	    print_element($sub_element);
	  }
	  $current_feature_id = store_features($tracking_dbh, $prepare_hash, $sub_element, 'subfeature', 
					       $current_gene_id, $current_transcript_id, $user_id, $category_id);
	}

      }

    }
  }

  return \%new_features;
}


1;
