#!/usr/local/ensembl/bin/perl -w

# Helper script for the GENCODE tracking system.
#
# Read tab-delimited file, store in internal tracking system db
#
# LOAD ENTRIES FOR cDNAs FOUND IN ENSEMBL BUT MISSING IN LOUTRE
#

package gencode_tracking_system::sources::ensembl_missing_cdnas;

use strict;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use base 'Exporter';

our @EXPORT    = qw( run_update );

sub run_update {
  my ($tracking_dbh, $prepare_hash, $user_id, $category_id) = @_;

  my $das_source  = 'ensembl_missing_cdnas';
  my $file        = "/nfs/acari/fsk/2_encode/data/ensembl/unannotated_cdnas.das";

  my ($current_gene_id, $current_transcript_id);
  my $features    = 0;
  #my %havana_ids  = ();

  #open data file
  open(IN, "<$file") or die "cant open file $file!\n";

  #go through file, read and store data as genes & transcripts
  while (my $line = <IN>){
    chomp $line;
    #ignore non-data lines
    next if( !($line =~ /^[\d]/) );
    my ($num, $loc, $int_ensembl_id, $havana_ids, $cdna_id) = split("\t", $line);
    my ($c, $ass, $chrom, $start, $end, $strand) = split(":", $loc);

#    foreach my $havana_id (split(",", $havana_ids)){
#      $havana_ids{$havana_id} = 1;
#    }
#    next;

    #add space
    $havana_ids =~ s/,/, /g;

    my %transcript;
    $current_transcript_id = 0;

    #build structure for transcript
    $transcript{'chrom'}        = $chrom;
    $transcript{'biotype'}      = "missing_cDNA";
    $transcript{'status'}       = undef;
    $transcript{'type'}         = "transcript";
    $transcript{'start'}        = $start;
    $transcript{'end'}          = $end;
    $transcript{'strand'}       = $strand;
    $transcript{'id'}           = $num."_".$cdna_id;
    $transcript{'description'}  = "cDNA: ".$cdna_id.
                                  "\nHAVANA-TRANSCR-IDS: ".$havana_ids;

    #clone gene from transcript
    my %gene = %transcript;

    #store as gene
    if($VERBOSE){
      print STDERR "GENE:\n";
      print_element(\%gene);
    }
    $gene{'type'} = "gene";
    $current_gene_id = store_features($tracking_dbh, $prepare_hash, \%gene, 'gene',
				      0, 0, $user_id, $category_id);

    #store as transcript
    if($VERBOSE){
      print STDERR "TRANSCRIPT:\n";
      print_element(\%transcript);
    }
    $current_transcript_id = store_features($tracking_dbh, $prepare_hash, \%transcript, 'transcript',
					    $current_gene_id, 0, $user_id, $category_id);

    $features++;
  }
  close(IN);

  #print STDERR "WE FOUND ".(scalar keys %havana_ids)." HAVANA IDS.\n";

  return $features;

}


1;
