
=head1 NAME

gencode_tracking_system::update

=head1 DESCRIPTION

Helper script for the GENCODE tracking system.
Check that gene & transcript exists and belong together.

=head1 CONTACT

Felix Kokocinski, fsk@sanger.ac.uk

=head1 COPYRIGHT

Copyright Felix Kokocinski, 2008-2009, 
supported by Wellcome Trust Sanger Institute (UK) 
and National Human Genome Research Institute (USA).




=cut

use strict;
use warnings;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use Getopt::Long;

my ($sql, $sth1);
my $tracking_dbh = connect_db($DBHOST, $DBPORT, $DBNAME, $DBUSER, $DBPASS)
  or die "cant connect to to database $DBNAME @ $DBHOST.\n";
print STDERR "Conected to $DBNAME @ $DBHOST ($WRITE).\n";

my $dbhost = "otterlive";
my $dbport = 3301;
my $dbname = "loutre_human";
my $dbuser = "ottro";
my $dbpass = undef;
my $loutre_db = new Bio::EnsEMBL::DBSQL::DBAdaptor(
						   -host    => $dbhost,
						   -user    => $dbuser,
						   -pass    => $dbpass,
						   -port    => $dbport,
						   -dbname  => $dbname,
						  ) or die "cant connect to loutre!\n";
print STDERR "Conected to $dbname @ $dbhost.\n";

#my $ta = $loutre_db->get_TranscriptAdaptor;
my $ga = $loutre_db->get_GeneAdaptor;

my $inchrom     = undef;
my $category_id = 1;
my $limit       = ""; #" LIMIT 20";

&GetOptions(
	    'chromosome:s'  => \$inchrom,
	    'category_id:s' => \$category_id,
	    'limit:s'       => \$limit,
	   );

my $k = 0;
my $old_date = '2000-01-01 12:00:00';
my $user_name = "fsk";
my $user_id = get_user_id( $user_name, $tracking_dbh );
my $notes;
my @chroms;

$sql = 'select gsi.stable_id, g.is_current '.
       'from transcript_stable_id tsi, gene_stable_id gsi, transcript t, gene g '.
       'where tsi.transcript_id=t.transcript_id and t.gene_id=gsi.gene_id and '.
       't.gene_id=g.gene_id and tsi.stable_id=? '.
       'order by t.transcript_id desc';
my $transcr_sth = $loutre_db->dbc->prepare($sql);

$sql = 'select p.id from projects p '.
       'where p.name = ? limit 1';
my $look_gene_sth = $tracking_dbh->prepare($sql);

$sql = 'insert into projects set name=?, created_on = ?, updated_on = ?, '.
       'identifier = ?, status = 1, Gchrom = ?, Gstart = 1, Gend = 1';
my $insert_sth = $tracking_dbh->prepare($sql);

$sql = 'update issues i set i.project_id=? '.
       'where i.id=? and i.category_id = ?';
my $rematch_sth = $tracking_dbh->prepare($sql);

$sql = 'select id, flag_name, created_date, checked_date from flags where issue_id=?';
my $flags_sth = $tracking_dbh->prepare($sql);

if($inchrom){
  push(@chroms, $inchrom);
}
else{
  @chroms = qw(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y);
}
foreach my $chromosome (@chroms){
  $sql = 'select i.id, i.subject, p.id, p.name '.
         'from issues i, projects p '.
         'where i.project_id=p.id and i.category_id = ? and i.Tchrom = ? '.
         'order by p.name, i.subject'.$limit.';';

  $sth1 = $tracking_dbh->prepare($sql);
  execute_query($category_id, $chromosome);
}

$transcr_sth->finish;
$look_gene_sth->finish;
$sth1->finish;
disconnect_db($tracking_dbh);

####################

sub execute_query {
  my ($category_id, $chrom) = @_;

  $sth1->execute($category_id, $chrom);

 TRANSCRIPT:
  while(my ( $transcript_id, $transcript_name, $gene_id, $gene_name) = $sth1->fetchrow_array){
    #print "\n$transcript_name / $gene_name ";

#    my $gene = $ga->fetch_by_stable_id($gene_name);
#    if(!$gene){
#     print "CANT_GET_GENE $gene_name\t$transcript_name\n";
#     next TRANSCRIPT;
#    }
#    #transcript appears in gene?
#    foreach my $transcript (@{ $gene->get_all_Transcripts }){
#      if($transcript->stable_id eq $transcript_name){
#	next TRANSCRIPT;
#      }
#    }
#    my $gene_2 = $ga->fetch_by_transcript_stable_id($transcript_name);
#    if(!$gene_2){
#      print "CANT_GET_GENE_FOR $transcript_name\n";
#    }
#    else{
#      print "MISMATCH $transcript_name with $gene_name instead_of ".
#	    $gene_2->stable_id." $transcript_id $gene_id\n";
#    }

    $transcr_sth->execute($transcript_name);
    my ($g_id, $g_curr);
    my $retired = 0;
    if($VERBOSE){ print "$transcript_name, $gene_name / "; }
  LOOKFOR:
    while(($g_id, $g_curr) = $transcr_sth->fetchrow_array){
      if($VERBOSE){ print "$g_id, $g_curr\n"; }
      if($g_curr){
	if($g_id ne $gene_name){
	  #gene name was changed
	  print "MISMATCH\t$transcript_name\t$gene_name\t".
	    "$g_id\t$transcript_id\t$gene_id\n";
	  mismatch($transcript_id, $g_id, $chrom, $transcript_name, $gene_name);
	  $retired = 0;
	  last LOOKFOR;
	}
	else{
	  $retired = 0;
	  last LOOKFOR;
	}
      }
      elsif($g_id eq $gene_name){
	$retired = $g_id;
      }
    }
    if($retired){
      print "RETIRED_TRANSCRIPT\t$transcript_name\t$gene_id\n";
      #check for flags
      $flags_sth->execute($transcript_id);
      while(my ($flag_id, $flag_name, $created_date, $checked_date) = $flags_sth->fetchrow_array){
	if(!$checked_date){ $checked_date = "-" };
	print "Flags:\t$flag_id\t$flag_name\t$created_date\t$checked_date\n";
      }
      if($WRITE){
	#change_project_status($tracking_dbh, $gene_id, 0, $user_id);
	change_issue_status($tracking_dbh, $transcript_id, "Removed", $user_id);
      }
    }

  }
  print "\n";
}


#resolve mismatch: look for real gene,
# relink or create and link
sub mismatch{
  my ($transcript_id, $g_id, $chrom, $transcript_name, $gene_name) = @_;

  #if($k++>5){ exit 0};

  $look_gene_sth->execute($g_id);
  my ($gene_id) = $look_gene_sth->fetchrow_array;
  if(!$gene_id){
    print "CREATING\t$g_id\t";
    if($WRITE){
      $insert_sth->execute($g_id, $old_date, $old_date, lc($g_id), $chrom);
      $gene_id = $tracking_dbh->last_insert_id(undef, undef, undef, undef);
      $notes = "Created to fix transcript $transcript_name.";
      write_history($tracking_dbh, $gene_id, "gene", $user_id, $notes);
    }
    else{
      $gene_id = "new_id";
    }
  }
  print "SETTINGTO\t$gene_id\t$category_id\t$transcript_id\n";
  if($WRITE){
    $rematch_sth->execute($gene_id, $transcript_id, $category_id);
    $notes = "Moved transcript $transcript_name from locus $gene_name to $g_id.";
    write_history($tracking_dbh, $transcript_id, "transcript", $user_id, $notes);
  }
}
