
=head1 NAME

gencode_tracking_system / set_priorities

=head1 DESCRIPTION

Helper script for the GENCODE tracking system, to be run nightly.
Connect to the internal tracking system db, analyze flags and set priorities and active flags.

The flag priority definitions are now stored in tmp_values table.
You can also set priorities by executing specific queries or combined flags.

=head1 CONTACT

Felix Kokocinski, fsk@sanger.ac.uk

=head1 COPYRIGHT

Copyright Felix Kokocinski, 2008-2010, 
supported by Wellcome Trust Sanger Institute (UK) 
and National Human Genome Research Institute (USA).

You may distribute this module under the same terms as perl itself, 
citing the original source.

=cut

use strict;
use warnings;
use Getopt::Long;

use gencode_tracking_system::core;
use gencode_tracking_system::config;

#Names of genomic regions annotated to a "finished" degree, we are more strict here
my $finished_chroms = '"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "13", "20", "21", "22", "X", "Y"';
#user name of "system" user
my $user_name = "trackingsystem";

my $priority_id;
my $c = 0;
my %issue_flags;
my ($tracking_dbh, $sql);
my @prepared;
#define exceptions in external file?
my $use_priority_exceptions = 0;
#exception-lists
my (%not_found, %not_vega, %non_organism, %pcr);
#optional file to read ids for exceptions from
my $exception_file;
#...or define exceptions from description field?
my $use_priority_exceptions_from_db = 1;


&GetOptions(
	    'verbose!'         => \$VERBOSE,
	    'write!'           => \$WRITE,
	    'exception-list=s' => \$exception_file,
	   );


#connect to tracking system db
$tracking_dbh = connect_db($DBHOST, $DBPORT, $DBNAME, $DBUSER, $DBPASS)
  or die "cant connect to to database $DBNAME @ $DBHOST.\n";
print "Conected to $DBNAME @ $DBHOST.\n";

#get base source name, add other variables to use more than one core annotation
my ($core_source) = keys %CORE_SERVER;

#get system user id
my $user_id = get_user_id( $user_name, $tracking_dbh );

#general prepare statements
my $prepare_hash = prepare_statements($tracking_dbh);

#get all flags and their priorities
my $flaghash = get_flag_types($tracking_dbh);
my %flags;
foreach my $flag (keys %$flaghash){
  $flags{$flag} = $flaghash->{$flag}{'priority'};
}

#read issue ids for exception lists
read_exception_lists($exception_file) if($exception_file and $use_priority_exceptions);

#custom prepares for URGENT attention
$sql = "UPDATE issues SET active_flags = ? WHERE id = ?";
my $prepare_active_flags_sth = custom_prepare($tracking_dbh, $sql);

#set some defined flag combinations
specific_flag_combinations();

#get all active flag types
my $all_flags = get_flag_types($tracking_dbh, 1);
#add to priority list if neccessary
foreach my $flag_name (keys %$all_flags){
  if(!defined $flags{$flag_name}){
    $flags{$flag_name} = $DEFAULT_PRIORITY;
  }
}

#re-set seen-flags, NOT NEEDED?!
#reset_seen_flags($tracking_dbh, 'flags');
reset_seen_flags($tracking_dbh, 'issues');

#go through all flag names, collect flags & issues
foreach my $flag_name (sort keys %flags){

  print "Looking for flag $flag_name.";

#  if(!($flag_name =~ /^splice/)){
#    print "IGNORED\n";
#    next;
#  }

  #get specific unresolved flags (from flags table)
  my $flags = get_flag($tracking_dbh, undef, $flag_name, undef, undef, 'no');

  if(!scalar @$flags){
    print "In vane.\n";
    next;
  }
  else{
    print " Found ".(scalar @$flags)." flags.\n";
  }

  #find all issues with this flag
  foreach my $flag (@$flags){

    #add the flag type to the target issue
    if(!defined $issue_flags{ $flag->{'issue_id'} }){
      $issue_flags{ $flag->{'issue_id'} } = [];

      #set seen-flag for transcript
      #set_seen_flag($prepare_hash, $flag->{'id'}, 'flag', "1");
      set_seen_flag($prepare_hash, $flag->{'issue_id'}, 'issue', "1");
    }

    push(@{$issue_flags{ $flag->{'issue_id'} }}, $flag_name);

  }

}

print "\nHave ".(scalar keys %issue_flags)." total issue-flag entries.\n\n";

#store unique flag names as "active_flags" to issues
print "Storing active flags.\n";
if($WRITE){

  foreach my $flag_issue_id (keys %issue_flags){
    my %distinct_flag_hash = ();
    foreach my $flag_name (@{ $issue_flags{ $flag_issue_id } }){
      $distinct_flag_hash{$flag_name} = 1;
    }
    my @distinct_flag_array = keys %distinct_flag_hash;
    #print "FLAGS $flag_issue_id: ".(join(", ", @distinct_flag_array))."\n" if($VERBOSE);

    set_issue_flags($tracking_dbh, $flag_issue_id, \@distinct_flag_array, $prepare_active_flags_sth);
  }

}

print "Removing old active_flags.\n";
clear_active_flags($tracking_dbh, 1);

#set combined priorities higher
$c = 0;
print "Raising combined flag issue priorities.\n";
$priority_id = $PRIORITY{'urgent'};
foreach my $combined_sth (@prepared){
  $combined_sth->execute();
  while(my $issue_id = $combined_sth->fetchrow_array){
    if($WRITE){
      set_issue_priority($tracking_dbh, $prepare_hash, $issue_id, $priority_id, $user_id);
    }
    $c++;
    #remove from rest
    delete $issue_flags{$issue_id};
  }
  print "combined: $c.\n";
  $c = 0;
  $combined_sth->finish;
}

#set other issue priority according to the different flag types
print "Setting flag-specific issue priorities.\n";
foreach my $flag_issue_id (keys %issue_flags){

  my $highest_priority = $PRIORITY{'low'};
  foreach my $flag_name ( @{ $issue_flags{ $flag_issue_id } } ){

    #check a few exceptions
    next if ($use_priority_exceptions         and priority_exceptions($flag_issue_id, $flag_name));
    next if ($use_priority_exceptions_from_db and priority_exceptions_from_db($flag_issue_id, $flag_name));

    if($PRIORITY{ $flags{$flag_name} } > $highest_priority){
      $highest_priority = $PRIORITY{ $flags{$flag_name} };
    }
  }

  set_issue_priority($tracking_dbh, $prepare_hash, $flag_issue_id, $highest_priority, $user_id);
}

#re-set priorities to "low" of issues not "seen"
print "Lowering other issue priorities.\n";
$c = 0;
$priority_id = $PRIORITY{'low'};
my $core_category_id = get_category_id( $CORE_SERVER{$core_source}->{'category'}, $tracking_dbh );
foreach my $unseen_id ( @{ get_unseen_issues($tracking_dbh, undef, $priority_id) } ){
  set_issue_priority($tracking_dbh, $prepare_hash, $unseen_id, $priority_id, $user_id);
  $c++;
}
print "Found $c.\n";


#disconnect tracking system db
$prepare_active_flags_sth->finish;
release_prepares($prepare_hash);
disconnect_db($tracking_dbh);

print "\nDONE\n";




# some hard-coded flag combinations
sub specific_flag_combinations{

  #combination of missing_exon, missing_intron, missing_cdna
  $sql = 'SELECT id FROM issues '.
         'WHERE active_flags like "%missing_exon%" AND active_flags like "%missing_intron%" '.
         'AND active_flags like "%missing_cdna%";';
  my $prepare_combined_sth = custom_prepare($tracking_dbh, $sql);
  push(@prepared, $prepare_combined_sth);

  #missing_ccds on Sanger chromosomes (and not recently updated)
  $sql = 'SELECT id FROM issues '.
         'WHERE active_flags like "%missing_ccds%" AND Tchrom in('.$finished_chroms.')';
  #my $lastdumpdate = '2010-02-01';
  #if($lastdumpdate){
  #  $sql .= ' AND updated_on < $lastdumpdate';
  #}
  $prepare_combined_sth = custom_prepare($tracking_dbh, $sql);
  push(@prepared, $prepare_combined_sth);

  #missing_ccds in annotated regions on non-Sanger chromosomes
  $sql = 'SELECT i.id FROM flags f, issues i '.
         'WHERE f.flag_name="missing_ccds" AND f.checked_date IS NULL AND i.id=f.issue_id '.
         'AND Tchrom NOT IN('.$finished_chroms.') AND description NOT LIKE "%no_havana_genes%";';
  $prepare_combined_sth = custom_prepare($tracking_dbh, $sql);
  push(@prepared, $prepare_combined_sth);

}


#read issue ids for exception lists
sub read_exception_lists{
  my ($exception_file) = @_;

  open(F1, "<$exception_file") or die "Cant read file exception_file\n";
  <F1>;
  while(<F1>){
    chomp $_;
    my ($name, $remark) = split("\t", $_);
    #get internal id
    my $object = get_data_by_name($tracking_dbh, "transcript", $name);
    if(!$object or !($object->{'id'})){
      print "Could not find $name exception.\n";
      next;
    }
    my $id = $object->{'id'};

    if($remark =~ /NF$/){
      $not_found{$id} = 1;
    }
    elsif($remark =~ /VEGA$/){
      $not_vega{$id} = 1;
    }
    elsif($remark =~ /^non.+organ.+sup/){
      $non_organism{$id} = 1;
    }
    elsif($remark =~ /(RACE|RT-PCR)/){
      $pcr{$id} = 1;
    }
    $c++;

  }
  print "Exception list: Read $c lines, found ".(scalar keys %not_found)." X-not-founds, ".(scalar keys %not_vega)." not-vegas and ".
         (scalar keys %non_organism)." non-organism from $exception_file\n" if $VERBOSE;
  close(F1);
}


#make some exceptions (based on biotype, other flags, annotation remarks, etc.) before rising the issue priority
sub priority_exceptions{
  my ($flag_issue_id, $flag_name) = @_;

  if( ($flag_name =~ /^splice_(GT_AG|GC_AG|AT_AC)_none/) and exists($non_organism{$flag_issue_id}) ){
    #print  "excpt 1\n";
    return 1;
  }
  if( ($flag_name =~ /^splice_/) and exists($not_vega{$flag_issue_id}) ){
    #print  "excpt 2\n";
    return 1;
  }
  if( ($flag_name =~ /^splice_/) and exists($pcr{$flag_issue_id}) ){
    #print  "excpt 3\n";
    return 1;
  }

  return 0;
}


#make some exceptions (based on biotype, other flags, annotation remarks, etc.) before rising the issue priority
sub priority_exceptions_from_db{
  my ($flag_issue_id, $flag_name) = @_;

  my $object = get_data_by_id($tracking_dbh, "transcript", $flag_issue_id);
  if(!$object or !($object->{'id'})){
    print "Could not find $flag_issue_id exception.\n";
    next;
  }

  if( ($flag_name =~ /^splice_(GT_AG|GC_AG|AT_AC)_none/) and ($object->{'description'} =~  /non.+organ.+sup/) ){
    #print  "excpt 1\n";
    return 1;
  }
  if( ($flag_name =~ /^splice_/) and ($object->{'description'} =~ /not.+VEGA/) ){
    #print  "excpt 2\n";
    return 1;
  }
  if( ($flag_name =~ /^splice_/) and ($object->{'description'} =~ /(RACE|RT-PCR)/) ){
    #print  "excpt 3\n";
    return 1;
  }

  return 0;
}


__END__


