
=head1 NAME

gencode_tracking_system / set_flag_from_file

=head1 DESCRIPTION

Helper script for the GENCODE tracking system.
Read tab-delimited file, store as flags in tracking system db.

example:
Transcript_id      flag_name    any_optional_details

=head1 CONTACT

Felix Kokocinski, fsk@sanger.ac.uk

=head1 COPYRIGHT

Copyright Felix Kokocinski, 2008-2010, 
supported by Wellcome Trust Sanger Institute (UK) 
and National Human Genome Research Institute (USA).




=cut

use strict;
use warnings;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use Getopt::Long;

my (%yes, %no);

#only print the chromosomes & lengths
my $infile;
my $test   = 0;
my $c      = 0;
my $type   = "transcript";
my $flag_count = 0;
my $user_name = "trackingsystem";
my $use_secondary_id = 0;
my $check_biotype = 0;
my $has_header_row = 0;
my $line;

# ignore these biotypes
my $avoid_types = '^(pseudogene|retrotransposed|processed_pseudogene|transcribed_processed_pseudogene|transcribed_unprocessed_pseudogene|IG_pseudogene|TR_pseudogene|unitary_pseudogene|unprocessed_pseudogene|artifact|IG_gene)$';

&GetOptions(
	    'file=s'         => \$infile,
	    'check_biotype!' => \$check_biotype,
	    'secondary_id!'  => \$use_secondary_id,
	    'username=s'     => \$user_name,
	    'test!'          => \$test,
	    'has_header_row!'=> \$has_header_row,
	   );

#connect to tracking system db
my $tracking_dbh = connect_db($DBHOST, $DBPORT, $DBNAME, $DBUSER, $DBPASS);

if(!$tracking_dbh){
  die "Cant connect to to database $DBNAME @ $DBHOST.\n";
}

print STDERR "\n---------------------------------------------\n".
             "Connected to $DBNAME @ $DBHOST. WRITE = ".$WRITE.
             "\n---------------------------------------------\n\n";

#wait a little to allow emergency break by user
sleep(5);

#get core annotation name
my ($havana_source, $cid) = get_core_annotation($tracking_dbh);

#system user
my $user_id = get_user_id( $user_name, $tracking_dbh );

#prepare sql
my $prepare_hash = prepare_statements($tracking_dbh);

#open data file
open(IN, "<$infile") or die "cant open file $infile!\n";

$line = <IN> if($has_header_row);

#go through file, read and store data as flags
while ($line = <IN>){
  chomp $line;
  my ($transcript_id, $flag_name, $details) = split("\t", $line);
  my $use_id = $transcript_id;

  if($use_secondary_id){
    $use_id = get_secondary_id($transcript_id, $prepare_hash);
  }

  if(!$use_id){
    warn "Can't find id $transcript_id\n";
    $no{$transcript_id} = 1;
    next;
  }
  $yes{$transcript_id} = 1;

  my $issue = get_data_by_name($tracking_dbh, $type, $use_id);

  if(!$issue or !$issue->{'id'}){
    warn "Cant find issue $transcript_id\n";
    next;
  }

  #print STDERR "HAVE transcript for $transcript_id, ".$issue->{'id'}."\n" if($issue);

  #check if a flag was set / resolved already
  my $set_flag = 1;
  my $flags = get_flag($tracking_dbh, $issue->{'id'}, $flag_name, undef,
		       undef, undef);

  if(scalar @$flags){
    #see if this type of flag was set already, set seen status
    foreach my $flag (@$flags){
      my $note1 = $flag->{'note'};
      my $note2 = $details;
      # ignore whitespace differences between two flags
      $note1 =~ s/\s//g; $note2 =~ s/\s//g;

      #hack for changed note
      $note2 =~ m/(.+;.+;.+);.+/g;
      $note2 = $1;

      if(($flag->{'flag_name'} eq $flag_name) and ($note1 eq $note2)){
	set_seen_flag($prepare_hash, $flag->{'id'}, "flag", "2");
	print " Set seen:".($flag->{'id'})."\n";
	$set_flag = 0;
      }
    }
    print STDERR "  Found existing flag $flag_name.\n" if($VERBOSE);
  }
  if($set_flag){

    if($check_biotype){
      my $biotype = get_custom_value($prepare_hash, "Transcripttype", $issue->{'id'});
      if($biotype =~ /$avoid_types/){
	print STDERR "Avoiding $biotype.\n";
	next;
      }
    }

    #set tag / write flag
    print STDERR "  Setting flag $flag_name to ".$issue->{'name'}." / ".$issue->{'id'}." ($details).\n" if($VERBOSE);
    if($WRITE){ set_flag($tracking_dbh, $issue->{'id'}, $flag_name, undef,
			 $user_id, undef, $details); }

    $flag_count++;
  }
  print STDERR "\n" if($VERBOSE);

  last if($test and ($flag_count>10));
}


print STDERR "\nFound ".scalar keys(%yes).", missed ".scalar keys(%no)."\n\n";

print STDERR "\n Set $flag_count flags.\n\n";
