#!/usr/local/ensembl/bin/perl -w

# Helper script for the GENCODE tracking system, to be run nightly.
#
# Connect to the external DAS servers,
# fetch data, store in internal tracking system db
# and recalculate dependant fields in db

package gencode_tracking_system::sources::cnio_isoforms;

use strict;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use base 'Exporter';

our @EXPORT    = qw( run_update );

sub run_update {
  my ($tracking_dbh, $prepare_hash, $user_id, $category_id, $feature_cols, $chromosome) = @_;

  my $gene_type     = "CNIO_isoform_scores";
  my $gene_status   = undef;
  my $id_prefix     = "CNIO_iso";
  my %new_features  = ();
  my ($current_transcript_id, $current_gene_id);
  my $feature_count = 0;
  if(!$chromosome){
    $chromosome = 0;
  }
  my $updated;

  #get general project instead:
  $current_gene_id = get_project_for_category($prepare_hash, $tracking_dbh, $category_id);
  die "Can't find general project for category $category_id!\n" unless $current_gene_id;

  # go through loci
  foreach my $feature_col (@$feature_cols) {

    my $ott_id = "";
    my ($start, $end);
    my $strand = 0;
    my @notes;

 print STDERR "STARTING COL LOOP.\n";

    foreach my $feature (@$feature_col) {

      print STDERR "next feature\n";
      foreach my $k (keys %$feature){
	print STDERR "$k=".$feature->{$k}."\n";
      }

      #get values oncce for transcript annotation
      if(!$ott_id){

	#ID
	my $id = $feature->{'feature_id'};
	$id =~ /^(\w+)\:.+$/;
	$ott_id = $1;
	my $havana_id = "HAVANA-TRANSCR-IDS: ".$ott_id;
	push(@notes, $havana_id);

	#"location"
	$start = $feature->{'start'};
	$end   = $feature->{'end'};

      }

      my $method = $feature->{'method_id'};

      my $i = 0;
      while(defined($feature->{'note'}->[$i])){
	my $morenotes = $feature->{'note'}->[$i];
	my ($morenotes_type, $morenotes_value) = split('\|', $morenotes);
	if($morenotes_type eq 'TRANSCRIPTSTATUS'){
	  my ($val_1, $val_2) = split(" ", $morenotes_value);
	  push(@notes, $method.": ".$val_2." = ".$val_1);
	}
	$i++;
      }

    }

    #build structure for transcript
    my %transcript;
    $transcript{'chrom'}        = $chromosome;
    $transcript{'biotype'}      = $gene_type;
    $transcript{'status'}       = $gene_status;
    $transcript{'type'}         = "transcript";
    $transcript{'start'}        = $start;
    $transcript{'end'}          = $end;
    $transcript{'strand'}       = $strand;
    $transcript{'id'}           = $id_prefix."_".$ott_id;
    $transcript{'description'}  = join("\n", @notes);
    $transcript{'created_on'}   = undef;
    $transcript{'updated_on'}   = undef;

    #clone gene from transcript
    my %gene = %transcript;
    $gene{'description'}  = "";
    $gene{'type'}         = "gene";

    #store transcript
    if($VERBOSE){
      print "TRANSCRIPT:\n";
      print_element(\%transcript);
    }
    ($current_transcript_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%transcript,
					    'transcript', $current_gene_id, 0, 
					    $user_id, $category_id);

    $feature_count++;
  }

  return $feature_count;
}


1;


__DATA__

<SEGMENT id="OTTHUMT00000195025" start="1" stop="62" version="1.0">
   <FEATURE id="OTTHUMT00000195025:conservation_exon" label="conservation_exon">
      <TYPE id="SO:0001060" category="inferred from electronic annotation (ECO:00000067)">sequence_variant</TYPE>
      <METHOD id="CExonic">CExonic</METHOD>
      <START>1</START>
      <END>62</END>
      <SCORE>0</SCORE>
      <ORIENTATION>0</ORIENTATION>
      <PHASE>-</PHASE>
        <NOTE>TRANSCRIPTTYPE|conservation_exon</NOTE>
<NOTE>TRANSCRIPTSTATUS|UNKNOWN conservation_exon</NOTE>
