#!/usr/local/ensembl/bin/perl -w

# Helper script for the GENCODE tracking system, to be run nightly.
#
# Connect to the external DAS servers,
# fetch data, store in internal tracking system db
# and recalculate dependant fields in db

package gencode_tracking_system::sources::crg_u12;

use strict;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use base 'Exporter';

our @EXPORT    = qw( run_update );

sub run_update {
  my ($response, $chrom, $tracking_dbh, $prepare_hash, $user_id, $category_id, $previous_genes) = @_;

  my $gene_type   = "CRG_U12";
  my $gene_status = 'new';
  my %new_features = ();
  my $prefix      = 'U12_';

  my ($current_transcript_id, $current_gene_id, $updated);

  # go through loci
  while (my ($url, $features) = each %$response) {

    if(ref $features eq "ARRAY"){
      print "Received ".scalar @$features." features.\n" if $VERBOSE;

    FEATURES:
      foreach my $feature (@$features) {

	my $phase = "0";
	my %notes = ();
	my $i     = 0;
	my $description = "";

	while(defined($feature->{'note'}->[$i])){
	  my $morenotes = $feature->{'note'}->[$i];
	  my ($morenotes_type, $morenotes_value) = split('=', $morenotes);
	  #$morenotes_value =~ s/\&\#39\;/\'/g;
	  $notes{$morenotes_type} = $morenotes_value;
	  $i++;
	}

	my $timestamp_updated = $notes{'LASTMOD'};
	my $timestamp_created = $notes{'LASTMOD'};

	$description .= "TRANSCRIPTTYPE:".$notes{'TRANSCRIPTTYPE'};
	$description .= "\nTRANSCRIPTSTATUS:".$notes{'TRANSCRIPTSTATUS'};

	#remove duplicates from overlapping regions
	if(defined $previous_genes and exists $previous_genes->{$feature->{'feature_id'}}){
	  next FEATURES;
	}
	$new_features{$feature->{'feature_id'}} = 1;

	my $grouphash = $feature->{'group'}->[0];
	my $group_type   = $grouphash->{'group_type'}; #??
	$description    .= "\nHAVANA-TRANSCR-IDS:".$grouphash->{'group_id'};

	my %transcript;

	#build structure for transcript
	$transcript{'chrom'}        = $chrom;
	$transcript{'biotype'}      = $gene_type;
	$transcript{'status'}       = $gene_status;
	$transcript{'type'}         =  $feature->{'type'};
	$transcript{'start'}        = $feature->{'start'};
	$transcript{'end'}          = $feature->{'end'};
	$transcript{'strand'}       = $feature->{'orientation'};
	$transcript{'id'}           = $prefix.$feature->{'feature_id'};
	$transcript{'description'}  = $description;
	$transcript{'created_on'}   = $timestamp_created;
	$transcript{'updated_on'}   = $timestamp_updated;

	#clone gene from transcript
	my %gene = %transcript;

	#store gene
	if($VERBOSE){
	  print "GENE:\n";
	  print_element(\%gene);
	}
	$gene{'type'} = "gene";
	#($current_gene_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%gene, 'gene', 0, 0, 
	#					      $user_id, $category_id);
	$current_gene_id = store_features($tracking_dbh, $prepare_hash, \%gene, 'gene', 0, 0, 
						      $user_id, $category_id);


	#store transcript
	if($VERBOSE){
	  print "TRANSCRIPT:\n";
	  print_element(\%transcript);
	}
	$current_transcript_id = store_features($tracking_dbh, $prepare_hash, \%transcript,
						'transcript', $current_gene_id, 0, 
						$user_id, $category_id);
	#($current_transcript_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%transcript,
	#					'transcript', $current_gene_id, 0, 
	#					$user_id, $category_id);

      }
      @$features = ();
    }
  }

  return \%new_features;
}


1;


__END__


http://genome.crg.es:9000/das/U12_Annotation/features?segment=22:10000000,25000000

<FEATURE id="OTTHUMT00000319965_1" label="OTTHUMT00000319965_1">
 <TYPE id="intron" category="ECO:0000053">intron</TYPE>
 <START>19305882</START>
 <END>19305985</END>
 <METHOD id="U12_pipeline">U12_pipeline</METHOD>
 <SCORE>0</SCORE>
 <PHASE>-</PHASE>
 <ORIENTATION>-</ORIENTATION>
 <NOTE>LASTMOD=2008-11-05T16:04:49+0100</NOTE>
 <NOTE>TRANSCRIPTTYPE=processed_transcript</NOTE>
 <NOTE>TRANSCRIPTSTATUS=UNKNOWN</NOTE>
 <GROUP id="OTTHUMT00000319965" type="U12_pipeline_prediction"/>
</FEATURE>
