#!/usr/local/ensembl/bin/perl -w

# Helper script for the GENCODE tracking system.
#
# Read tab-delimited file, store in internal tracking system db
#
#  LOAD ENTRIES FOR CODING OVERLAPS WITHIN LOUTRE
#

package gencode_tracking_system::sources::havana_overlaps;

use strict;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use base 'Exporter';

our @EXPORT    = qw( run_update );

sub run_update {
  my ($tracking_dbh, $prepare_hash, $user_id, $category_id) = @_;

  my $das_source  = 'havana_overlaps';
  my $file        = "/lustre/scratch103/ensembl/ba1/data_for_havana/v56/coding_overlap.das";

  my ($current_gene_id, $current_transcript_id, $updated);
  my $features    = 0;

  #open data file
  open(IN, "<$file") or die "cant open file $file!\n";

  #get general project instead:
  $current_gene_id = get_project_for_category($prepare_hash, $tracking_dbh, $category_id);
  die "Can't find general project for category $category_id!\n" unless $current_gene_id;

  #go through file, read and store data as genes & transcripts
  while (my $line = <IN>){
    chomp $line;
    #ignore non-data lines
    next if( !($line =~ /^chromosome/) );
    my ($chromloc, $havana_gene_ids, $gene_names, $havana_transcript_ids, $make_non_overlapping,
	$joining_ids, $overlaps, $same_exons) = split("\t", $line);
    my ($c, $ass, $chrom, $start, $end, $strand) = split(":", $chromloc);
    $havana_gene_ids =~ s/,/, /g;
    #$havana_transcript_ids =~ s/,/, /g;
    $joining_ids =~ s/,/, /g;
    $havana_gene_ids =~ /^([A-Z|0-9]+),.*/;
    my $id = $1;

    #unique ids only
    my %u_havana_transcript_ids;
    foreach my $havana_transcript_id (split(',', $havana_transcript_ids)){
      $u_havana_transcript_ids{ $havana_transcript_id } = 1;
    }

    my %transcript;
    $current_transcript_id = 0;

    #build structure for transcript
    $transcript{'chrom'}        = $chrom;
    $transcript{'biotype'}      = "overlappings";
    $transcript{'type'}         = "transcript";
    $transcript{'start'}        = $start;
    $transcript{'end'}          = $end;
    $transcript{'strand'}       = $strand;
    $transcript{'id'}           = 'overlap_'.$id;
    $transcript{'description'}  = "HAVANA-GENE-IDS: ".$havana_gene_ids."\n".
                                  "HAVANA-TRANSCR-IDS: ".(join(", ", keys %u_havana_transcript_ids))."\n".
				  "JOINING_TRANSCR: ".$joining_ids."\n".
				  "NUMBER_OF_JOINERS: ".$overlaps;
    if($make_non_overlapping eq "y"){
      $transcript{'description'} .= "\nNOTE: CAN BE SEPERATED";
    }
    if($same_exons eq "y"){
      $transcript{'description'} .= "\nNOTE: SAME_EXON_PATTERN";
    }

    #store as transcript
    if($VERBOSE){
      print STDERR "TRANSCRIPT:\n";
      print_element(\%transcript);
    }
    ($current_transcript_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%transcript, 'transcript',
					    $current_gene_id, 0, $user_id, $category_id); }

    $features++;
  }

  return $features;
}

1;


__DATA__

Genomic location        Stable IDs of overlapping Genes Gene names of overlapping Genes Stable IDs of overlapping Transcripts   Can make all Genes non-overlapping?     Joiners Num genes overlapping   Same exon pattern in 2 genes
chromosome:NCBI36:19:10077965:10087045:1        OTTHUMG00000150165,OTTHUMG00000150166   P2RY11,PPAN     OTTHUMT00000316664,OTTHUMT00000316663   y       (OTTHUMT00000316663)(OTTHUMT00000316664)        2       n


delete p from projects p, issues i where i.project_id=p.id and i.category_id=6;

#insert into projects set id=154413, name="overlapping_genes", description="Genes with overlapping coding regions", is_public=1, created_on="2008-09-08 16:42:47", updated_on="2008-09-08 16:42:47", identifier="overlap", status_id=1, status=1, Gchrom=0, Gstart=0, Gend=0, Gstrand=0;

update issues set project_id=154413 where category_id=6;
