
=head1 NAME

gencode_tracking_system : check_data.pl

=head1 DESCRIPTION

Helper script for the GENCODE tracking system, to be run nightly.
Connect to the internal tracking system db, compare data and set flags.

This script can be used to call data-specific analysis fucntions.
Some examples are given here.

Explicitely set analysis to run below.

TODO: Some generalization!

=head1 SYNOPSIS


=head1 CONTACT

Felix Kokocinski, fsk@sanger.ac.uk

=head1 COPYRIGHT

Copyright Felix Kokocinski, 2008-2010, 
supported by Wellcome Trust Sanger Institute (UK) 
and National Human Genome Research Institute (USA).

You may distribute this module under the same terms as perl itself, 
citing the original source.

=cut

use strict;
use warnings;
use Getopt::Long;
use gencode_tracking_system::core;
use gencode_tracking_system::config;

my $current_gene_id;
my $current_transcript_id;
my $current_feature_id;
my $current_gene_description;
my $das_source;
my $analysis_to_run;

#base source name, add other variables to use more than one core annotation
my ($core_source) = keys %CORE_SERVER;

##################################

#analysis to run, set to "1" to activate
my $ucsc_novelloci    = 0;
my $ENSEMBL_exons     = 0;
my $ENSEMBL_introns   = 0;
my $ENSEMBL_cdnas     = 0;
my $ENSEMBL_new_cdnas = 0;
my $HAVANA_overlaps   = 0;
my $flagged_pseudos   = 0;
my $MIT_CONGO         = 0;
my $missed_ccds       = 0;
my $missed_hgnc       = 0;
my $use_file          = 0;
my $UCSC_retros       = 0;
my $CRG_U12           = 0;
my $manual_hgnc       = 0;
my $rnaseq_data       = 0;
my $review_ccds       = 0;

##################################


#looking at ensembl to cluster transcripts?
my $ensembl = 0;

#status ids
my $status_new     = $STATUS{'New'};
my $status_updated = $STATUS{'Updated'};

#manually select chromosome to look at if desrired
my $only_this_chrom = undef;

#read ids from file
my $file = "";
my $flag_name = "";

my $user_name = "trackingsystem";
my $statuses  = '';

my $function;
my $flag_count = 0;


&GetOptions(
	    'file:s'         => \$file,
	    'analysis:s'     => \$analysis_to_run,
	    'write!'         => \$WRITE,
	    'env=s'          => \$ENVI,
	   );

gencode_tracking_system::config->set_databases();

#connect to tracking system db
my $tracking_dbh = connect_db($DBHOST, $DBPORT, $DBNAME, $DBUSER, $DBPASS)
  or die "cant connect to to database $DBNAME @ $DBHOST.\n";
if($VERBOSE){ print "Conected to $DBNAME @ $DBHOST.\n" }

#connect to ensembl db where needed
my ($db, $ga, $ta);
if($ensembl){
  $db = connect_ensembl($ENS_HOST, $ENS_PORT, $ENS_NAME, $ENS_USER, $ENS_PASS);
  $ta = $db->get_TranscriptAdaptor();
  $ga = $db->get_GeneAdaptor();
  if($VERBOSE){ print "Conected to ensembl\n" }
}else{
  $db = connect_ensembl($HAV_HOST, $HAV_PORT, $HAV_NAME, $HAV_USER, $HAV_PASS);
  $ta = $db->get_TranscriptAdaptor();
  $ga = $db->get_GeneAdaptor();
  if($VERBOSE){ print "Conected to havana\n" }
}

#Core annotation category
my $havana_category_id = get_category_id( $CORE_SERVER{$core_source}->{'category'}, $tracking_dbh );
#system user
my $user_id = get_user_id( $user_name, $tracking_dbh );

my $prepare_hash = prepare_statements($tracking_dbh);

##get sources
#foreach my $das_source (keys %OTHER_SERVERS) {
#  print STDERR "\n-------------------------------------\nLooking at $das_source.".
#               "\n-------------------------------------\n";
#  if(!($OTHER_SERVERS{$das_source}->{'active'})){
#    print STDERR "Not active.\n";
#    next;
#  }

#  #RUN UPDATE FUNCTIONS
#   $function   = $OTHER_SERVERS{$das_source}->{'update_function'};
#   $flag_count = $function($das_source, $OTHER_SERVERS{$das_source}->{'flag_name'}, 0);
#   print STDERR "Set $flag_count new flags.\n";
#}

if($ucsc_novelloci){
  $flag_count = check_new_locus('ucsc_novelloci', 'novel_locus', 0);
  print STDERR "Set $flag_count new flags.\n";
}
if($ENSEMBL_exons){
  $flag_count = check_affected_transcripts('ensembl_missing_exons', 'missing_exon', 1);
  print STDERR "Set $flag_count new flags.\n";
}
if($ENSEMBL_introns){
  $flag_count = check_affected_transcripts('ensembl_missing_introns', 'missing_intron', 1);
  print STDERR "Set $flag_count new flags.\n";
}
if($ENSEMBL_cdnas){
  $flag_count = check_affected_transcripts('ensembl_missing_cdnas', 'missing_cdna', 1);
  print STDERR "Set $flag_count new flags.\n";
}
if($ENSEMBL_new_cdnas){
  $flag_count = check_affected_transcripts('ensembl_missing_new_cdnas', 'missing_new_cdna', 1);
  print STDERR "Set $flag_count new flags.\n";
}
if($review_ccds){
  $flag_count = check_affected_transcripts('review_ccds', 'review_ccds', 1);
  print STDERR "Set $flag_count new flags.\n";
}
if($HAVANA_overlaps){
  $flag_count = check_affected_transcripts('havana_overlaps', 'overlapping_cds');
  print STDERR "Set $flag_count new flags.\n";
}
if($flagged_pseudos){
  $flag_count = check_category('yale_flaggedpseudogenes', 'flagged_pseudogene');
  print STDERR "Set $flag_count new flags.\n";
}
if($MIT_CONGO){
  $flag_count = check_congo('mit_congo', 'novel_locus');
  print STDERR "Set $flag_count new flags.\n";
}
if($missed_ccds){
  $flag_count = check_category('missing_ccds', 'missing_ccds');
  print STDERR "Set $flag_count new flags.\n";
}
if($missed_hgnc){
  $flag_count = check_affected_transcripts('missing_hgnc', 'missing_hgnc');
  print STDERR "Set $flag_count new flags.\n";
}
if($use_file){
  $flag_count = set_flags_for_ids('mit_congo', 'congo_nc', $file);
  print STDERR "Set $flag_count new flags.\n";
}
if($UCSC_retros){
  $flag_count = check_new_locus('ucsc_retro', 'novel_retro', 0);
  print STDERR "Set $flag_count new flags.\n";
}
if($CRG_U12){
  $flag_count = check_category('crg_u12', 'u12');
  print STDERR "Set $flag_count new flags.\n";
}
if($manual_hgnc){
  $file = "/nfs/acari/fsk/2_encode/data/hgnc/update.txt";
  $flag_count =  set_flags_for_ids2('hgnc', 'hgnc_update', $file);
  print STDERR "Set $flag_count new flags.\n";
}
if($rnaseq_data){
  $flag_count = check_new_locus('ensembl_rna_seq_loc', 'novel_RNASeq_loc', 1, "exon");
  print STDERR "Set $flag_count new flags.\n";
}


#disconnect tracking system db
disconnect_db($tracking_dbh);

print STDERR "\nDONE\n";

###############

#add flags for category specified
sub check_category {
  my ($category_name, $flag_name) = @_;

  my $flag_count = 0;
  $statuses = $status_new.", ".$status_updated;

  #check resolved status:
  my $is_resolved = undef; #'no';

  reset_seen_flags($tracking_dbh, "flags", $flag_name);

  my $other_category_id = get_category_id( $OTHER_SERVERS{$category_name}->{'category'}, $tracking_dbh );
  if(!$other_category_id){
    die "Could not find category for ".$OTHER_SERVERS{$category_name}->{'category'}."\n";
  }

  print STDERR "Looking for $category_name ($other_category_id) issues [$statuses].\n"  if($VERBOSE);
  my ($issues, $fields) = get_issues($tracking_dbh, undef, undef, undef, undef, $other_category_id, $statuses);

  print STDERR "HAVE ".(scalar @$issues)." for $category_name\n";

  for(my $i=0; $i< scalar @$issues; $i++){
    my $issue = $issues->[$i];

    print STDERR "HAVE ".$issue->{'name'}." (".$issue->{'transcript_id'}."/".$issue->{'gene_id'}.") at LOCATION ".
          $issue->{'chrom'}." ".$issue->{'start'}."-".$issue->{'end'}.", ".$issue->{'strand'}."\n" if($VERBOSE);

    #check if a flag was set / resolved already
    my $set_flag = 1;
    my $flags = get_flag($tracking_dbh, $issue->{'transcript_id'}, $flag_name, undef,
			 $issue->{'transcript_id'}, $is_resolved);
    if(scalar @$flags){
      #set seen status
      foreach my $flag (@$flags){
	set_seen_flag($prepare_hash, $flag->{'id'}, "flag", "2");
	#print " Set seen:".($flag->{'id'})."\n";
      }
      print STDERR "  Found existing flag $flag_name.\n" if($VERBOSE);
      $set_flag = 0;
    }
    if($set_flag){
      #set tag / write flag
      print STDERR "  Setting flag $flag_name to ".$issue->{'transcript_id'}.".\n" if($VERBOSE);
      if($WRITE){ set_flag($tracking_dbh, $issue->{'transcript_id'}, $flag_name, undef,
			   $user_id, $issue->{'transcript_id'}); }
      $flag_count++;
    }
    print STDERR "\n" if($VERBOSE);
  }

  analyse_seen_flags($tracking_dbh, "flags", $flag_name, $user_id);

  return $flag_count;
}


#check whether other features overlap havana loci
sub check_new_locus {
  my ($das_source, $flag_name, $use_subfeats, $sf_type) = @_;

  my $flag_count = 0;

  reset_seen_flags($tracking_dbh, "flags", $flag_name);

  my $other_category_id = get_category_id( $OTHER_SERVERS{$das_source}->{'category'}, $tracking_dbh );
  if(!$other_category_id){
    die "Could not find category for ".$OTHER_SERVERS{$das_source}->{'category'}."\n";
  }
  $statuses = $status_new.", ".$status_updated;

  my ($issues, $global_fields) = get_issues($tracking_dbh, undef, undef, undef, undef, $other_category_id, $statuses);

  for(my $i=0; $i< scalar @$issues; $i++){
    my $issue = $issues->[$i];

    if($only_this_chrom and ($only_this_chrom ne $issue->{'chrom'})){
      next;
    }

    my ($havana_loci, $fields);

    print "HAVE ".$issue->{'name'}." (".$issue->{'transcript_id'}."/".$issue->{'gene_id'}.") at LOCATION ".
          $issue->{'chrom'}." ".$issue->{'start'}."-".$issue->{'end'}.", ".$issue->{'strand'}."\n" if($VERBOSE);

    if($use_subfeats){
      ($havana_loci, $fields) = get_subfeatures($tracking_dbh, $issue->{'chrom'},
					      $issue->{'start'}, $issue->{'end'},
					      $issue->{'strand'}, $havana_category_id, undef, $sf_type);
    }
    else{
      ($havana_loci, $fields) = get_issues($tracking_dbh, $issue->{'chrom'},
					      $issue->{'start'}, $issue->{'end'},
					      $issue->{'strand'}, $havana_category_id);
    }

    if(scalar @$havana_loci){
      print "\t-> Found overlapping Havana loci.\n" if($VERBOSE);

      #check coordinates?

    }
    else{
      print "\t-> No overlapping Havana loci." if($VERBOSE);

      #check if a flag was set / resolved already
      my $set_flag = 1;
      my $flags = get_flag($tracking_dbh, $issue->{'transcript_id'}, $flag_name, undef, $issue->{'transcript_id'});
      if(scalar @$flags){
	print "  Found existing flag $flag_name.\n" if($VERBOSE);
	#mark flags as "seen"
	foreach my $flag (@$flags){
	  set_seen_flag($prepare_hash, $flag->{'id'}, "flag", 1);
	}
	$set_flag = 0;
      }
      if($set_flag){
	#set tag / write flag
	print "  Setting flag $flag_name.\n" if($VERBOSE);
	if($WRITE){
	  my $flag_id = set_flag($tracking_dbh, $issue->{'transcript_id'}, $flag_name, undef,
				 $user_id, $issue->{'transcript_id'});
	  set_seen_flag($prepare_hash, $flag_id, "flag", 1);
	}
	$flag_count++;
      }
      print "\n" if($VERBOSE);
    }

    #last if($flag_count>1);

  }

  analyse_seen_flags($tracking_dbh, "flags", $flag_name, $user_id);

  return $flag_count;
}


#check whether other features overlap havana loci
sub check_congo {
  my ($das_source, $flag_name) = @_;

  my $flag_count = 0;
  my %prepare_hash;

  my $sql = "SELECT cv.value FROM issues i, custom_fields cf, custom_values cv ".
            "WHERE i.id=? AND cv.customized_id=i.id AND cv.custom_field_id=cf.id AND cf.name=?";
  my $custom_val = $tracking_dbh->prepare($sql);
  $prepare_hash{'custom_value'} = $custom_val;

  my @dbfields = qw(id project_id subject description status_id priority_id created_on updated_on 
		    Tchrom Tstart Tend Tstrand);
  $sql = "SELECT ".join(", ", @dbfields)." FROM issues WHERE Tchrom = ? AND Tstart <= ? ".
           "AND Tend >= ? AND Tstrand = ? and category_id = ?";
  my $issue_select = $tracking_dbh->prepare($sql);
  $prepare_hash{'issue_select'} = $issue_select;

  my $other_category_id = get_category_id( $OTHER_SERVERS{$das_source}->{'category'}, $tracking_dbh );
  if(!$other_category_id){
    die "Could not find category for ".$OTHER_SERVERS{$das_source}->{'category'}."\n";
  }
  $statuses = $status_new.", ".$status_updated;

  my ($issues, $global_fields) = get_issues($tracking_dbh, undef, undef, undef, undef, $other_category_id, $statuses);

  print STDERR "There are ".(scalar @$issues)." issues\n";

  for(my $i=0; $i< scalar @$issues; $i++){
    my $issue = $issues->[$i];

    if($only_this_chrom and ($only_this_chrom ne $issue->{'chrom'})){
      next;
    }

    my ($havana_loci, $fields);

    print STDERR "HAVE ".$issue->{'name'}." (".$issue->{'transcript_id'}."/".$issue->{'gene_id'}.") at LOCATION ".
          $issue->{'chrom'}." ".$issue->{'start'}."-".$issue->{'end'}.", ".$issue->{'strand'}."\n" if($VERBOSE);

    ($havana_loci, $fields) = get_issues($tracking_dbh, $issue->{'chrom'},
					 $issue->{'start'}, $issue->{'end'},
					 $issue->{'strand'}, $havana_category_id,
					 $prepare_hash{'issue_select'});
    print STDERR "."; #next;

    if(scalar @$havana_loci){
      print STDERR "\t-> Found overlapping Havana loci.\n" if($VERBOSE);

      #check coordinates?

      next;

      foreach my $hav_iss (@$havana_loci){
	my $t_type = get_custom_value(\%prepare_hash, "transcript_type", $hav_iss->{'transcript_id'});
#	if($t_type =~ /pseudo/){
#	  #mark pseudogene hits
#	  print STDERR "CONGO\t".$issue->{'name'}."\t".$t_type."\n" if($VERBOSE);
#	}
#	else{
	  print STDERR "CONGO\t".$issue->{'name'}."\t".$issue->{'chrom'}."\t".$t_type."\t" if($VERBOSE);
	  #check coding status in subfeatures
	  my ($havana_subfeats, $subfeat_fields) = get_subfeatures($tracking_dbh, $issue->{'chrom'},
								   $issue->{'start'}, $issue->{'end'},
								   $issue->{'strand'}, $havana_category_id);
	  my @subf = ();
	  foreach my $subfeat (@$havana_subfeats){
	    #print STDERR $subfeat->{'type'}."\n" if($VERBOSE);
	    push(@subf, $subfeat->{'type'});
	  }
	  print STDERR join(", ", @subf)."\n";
#	}
      }

    }
    else{
      print STDERR "CONGO\t".$issue->{'name'}.$issue->{'chrom'}."\t"."\t-No_overlapping_Havana_loci\n" if($VERBOSE);

      #check if a flag was set / resolved already
      my $set_flag = 1;
      my $flags = get_flag($tracking_dbh, $issue->{'transcript_id'}, $flag_name, undef, $issue->{'transcript_id'});
      if(scalar @$flags){
	#check details?
	#foreach my $flag (@$flags){}
	print STDERR "  Found existing flag $flag_name.\n" if($VERBOSE);
	$set_flag = 0;
      }
      if($set_flag){
	#set tag / write flag
	print STDERR "  Setting flag $flag_name.\n" if($VERBOSE);
	if($WRITE){ set_flag($tracking_dbh, $issue->{'transcript_id'}, $flag_name, undef,
			     $user_id, $issue->{'transcript_id'}); }
	$flag_count++;
      }
      print STDERR "\n" if($VERBOSE);
    }

    last if($i>10);

  }

  return $flag_count;
}


#flag havana id named in the descriptions of other categories
sub check_affected_transcripts {
  my ($das_source, $flag_name, $cluster_transcripts, $geneidgiven) = @_;

  my $flag_count  = 0;
  my $description = "";
  my @ids_to_flag = ();
  my $type = "transcript";

  my $other_category_id = get_category_id( $OTHER_SERVERS{$das_source}->{'category'}, $tracking_dbh );
  if(!$other_category_id){
    die "Could not find category for '".$OTHER_SERVERS{$das_source}->{'category'}."'/$das_source\n";
  }
  $statuses = $status_new.", ".$status_updated;

  reset_seen_flags($tracking_dbh, "flags", $flag_name);

  my ($issues, $fields) = get_issues($tracking_dbh, undef, undef, undef, undef, $other_category_id, $statuses);

  for(my $i=0; $i< scalar @$issues; $i++){
    my $issue = $issues->[$i];
    my $describer = "";
    @ids_to_flag = ();

    print STDERR "HAVE ".$issue->{'name'}." (".$issue->{'transcript_id'}."/".$issue->{'gene_id'}.") at LOCATION ".
          $issue->{'chrom'}." ".$issue->{'start'}."-".$issue->{'end'}.", ".$issue->{'strand'}
	  .", ".$issue->{'description'}."\n" if($VERBOSE);

    $description = $issue->{'description'};
    if($description =~ /:/){

      #use this description field for overlaps etc
      if($description =~ /JOINING_TRANSCR/){
      DESCRIBER1:
	foreach $describer ( split("\n", $description) ){

	  my ($key, $value) = split(':', $describer);
	  chomp $key;
	  chomp $value;
	  #print STDERR "DEASCR = $key => $value\n";
	  if($key eq "JOINING_TRANSCR"){
	    if($value =~ /\,/){
	      @ids_to_flag = split(", ", $value);
	    }
	    else{
	      push(@ids_to_flag, $value);
	    }
	    last DESCRIBER1;
	  }
	}
      }

      #otherwise use this field for other missings etc
      elsif($description =~ /HAVANA-TRANSC/){
      DESCRIBER2:
	foreach $describer ( split("\n", $description) ){
	  my ($key, $value) = split(':', $describer);
	  chomp $key;
	  chomp $value;
	  #print STDERR "DEASCR = $key => $value\n";
	  if($key =~ /HAVANA-TRANSC/){
	    #print "-> $value \n";
	    if($value =~ /\,/){
	      print "Adding $value for checking.\n" if($VERBOSE);
	      @ids_to_flag = split(", ", $value);
	    }
	    else{
	      push(@ids_to_flag, $value);
	      print "Adding $value for checking.\n" if($VERBOSE);
	    }
	    last DESCRIBER2;
	  }
	}

	if($cluster_transcripts){
	  #flag only one transcript per locus
	  my %distinct_gene_ids = ();
	  my @transcript_ids = ();
	  foreach my $id_to_flag (@ids_to_flag){
	    my $gene;
	    my $gene_id = $id_to_flag;
	    if(!$geneidgiven){
	      #get gene-id from ensembl
	      $gene = $ga->fetch_by_transcript_stable_id($id_to_flag);
	      $gene_id = $gene->stable_id if($gene);
	    }
	    if(!$gene_id){
	      print "Unknown gene $id_to_flag!\n";
	    }
	    else{
	      print "Using gene $gene_id!\n" if($VERBOSE);
	      if(!exists $distinct_gene_ids{$gene_id}){
		$distinct_gene_ids{$gene_id} = 1;
		push(@transcript_ids, $id_to_flag);
	      }
	    }
	  }
	
	  @ids_to_flag = @transcript_ids;
	}

      }

    }


    foreach my $id_to_flag (@ids_to_flag){
      $id_to_flag =~ s/\s//g;

      print STDERR "Looking for $id_to_flag\t" if($VERBOSE);

      #does transcript exist & is active?
      my $existing_transcripts = get_data_by_name($tracking_dbh, $type, $id_to_flag);
      #my $existing_transcripts = get_transcripts($tracking_dbh, 1, undef, $id_to_flag);
      #TODO:
      #activity-check

      if(($existing_transcripts) and (exists($existing_transcripts->{'name'}))
	 and ($existing_transcripts->{'name'} eq $id_to_flag)){

	#check if a flag was set / resolved already
	my $set_flag = 1;
	my $flags = get_flag($tracking_dbh, $existing_transcripts->{'id'}, $flag_name,
			     undef, $issue->{'transcript_id'});

	if(scalar @$flags){
	  #check details?
	  #foreach my $flag (@$flags){}
	  print STDERR "  Found existing flag $flag_name.\n" if($VERBOSE);
	  #mark flags as "seen"
	  foreach my $flag (@$flags){
	    set_seen_flag($prepare_hash, $flag->{'id'}, "flag", 1);
	  }
	  $set_flag = 0;
	}
	if($set_flag){
	  #set tag / write flag
	  print STDERR "  Setting flag $flag_name ($id_to_flag) from ".$issue->{'transcript_id'}." to ".
	    $existing_transcripts->{'id'}.".\n" if($VERBOSE);
	  if($WRITE){
	    my $flag_id = set_flag($tracking_dbh, $existing_transcripts->{'id'}, $flag_name,
				   undef, $user_id, $issue->{'transcript_id'});
	    set_seen_flag($prepare_hash, $flag_id, "flag", 1);
	  }
	  $flag_count++;
	}

      }
      else{
	print STDERR "\nTrouble finding transcript-id $id_to_flag!\n";
      }
      print STDERR "\n" if($VERBOSE);
    }

    #last if($i>2);
    #last if($flag_count>1);

  }

  analyse_seen_flags($tracking_dbh, "flags", $flag_name, $user_id);

  return $flag_count;
}


#set flags for predefined ids (from file)
sub set_flags_for_ids {
  my ($das_source, $flag_name, $file) = @_;

  my $flag_count  = 0;
  my $description = "";
  my $type = "transcript";
  my %h_ids = ();
  my $c = 0;

  my $other_category_id = get_category_id( $OTHER_SERVERS{$das_source}->{'category'}, $tracking_dbh );
  if(!$other_category_id){
    die "Could not find category for ".$OTHER_SERVERS{$das_source}->{'category'}."\n";
  }

  open(F, "<$file") or die "Can't open $file.\n";
  while(my $line = <F>){
    print $line;
    chomp $line;
    my ($c_id, $h_ids) = split("\t", $line);
    foreach my $h_id (split(", ", $h_ids)){
      if(!exists $h_ids{$h_id}){
	$h_ids{$h_id} = ();
      }
      print "pushing $c_id to $h_id\n";
      push(@{ $h_ids{$h_id} }, $c_id);
    }
    #last if($c++>10);
  }

  foreach my $uid (keys %h_ids){
    #set tag / write flag
    print STDERR "  Setting flag $flag_name on ".$uid." to ".$h_ids{$uid}->[0]." / ".
      (join(", ",@{$h_ids{$uid}})).".\n" if($VERBOSE);
    if($WRITE){
      set_flag($tracking_dbh, $uid, $flag_name, undef,
	       $user_id, $h_ids{$uid}->[0]);
      if(scalar @{$h_ids{$uid}} > 1){
	shift(@{$h_ids{$uid}});
	my @addids = map('#'.$_, @{$h_ids{$uid}});
	my $notes = "Additional \"".$flag_name."\" hits to ".$das_source." ids ".join(", ",@addids).".";
	write_history($tracking_dbh, $uid, 'transcript', $user_id, $notes);
      }
    }

    $flag_count++;
  }

  return $flag_count;
}


#set flags for predefined ids (from file)
sub set_flags_for_ids2 {
  my ($das_source, $flag_name, $file) = @_;

  my $flag_count  = 0;
  my $description = "";
  my $type = "transcript";
  my %h_ids = ();
  my $c = 0;

  open(F, "<$file") or die "Can't open $file.\n";
  while(my $line = <F>){
    #print $line;
    chomp $line;
    my ($id_1, $id_2, $id_3, $id_4) = split('\|', $line);
    print "->$id_1, $id_2, $id_3, $id_4\n";
    $id_1 =~ s/\s//g;
    $id_3 =~ s/\s//g;
    if($id_1 =~ /(.+)~withdrawn/){
      $id_1 = "Withdrawn:".$1;
    }

    my $gene_id = get_data_by_name($tracking_dbh, "gene", $id_3)->{'id'};
    if(!$gene_id){
      print STDERR "GENE NOT FOUND: $line\n";
      next;
    }
    my $t_id = get_transcripts($tracking_dbh, 1, 1, undef, $gene_id)->[0];
    if(!$t_id){
      print STDERR "TRANSCRIPT NOT FOUND: $line\n";
      next;
    }
    #set tag / write flag
    print STDERR "  Setting flag $flag_name to $gene_id / $t_id / ".$id_1.": ".
      $id_3.".\n" if($VERBOSE);
    if($WRITE){
      set_flag($tracking_dbh, $t_id, $flag_name, undef,
	       $user_id, undef, $id_1);
    }

    $flag_count++;
  }

  return $flag_count;
}


__END__


INSERT INTO `flags` VALUES (1,-1,'other_problem',0,NULL,NULL,NULL),(2,-1,'missing_exon',0,NULL,NULL,NULL),(3,-1,'missing_intron',0,NULL,NULL,NULL),(4,-1,'overlapping_cds',0,NULL,NULL,NULL),(5,-1,'different_cds',0,NULL,NULL,NULL),(6,-1,'different_splice_sites',0,NULL,NULL,NULL),(7,-1,'different_splice_start_or_end',0,NULL,NULL,NULL),(8,-1,'novel_locus',0,NULL,NULL,NULL),(9,-1,'novel_isoform',0,NULL,NULL,NULL),(10,-1,'missing_cdna',0,NULL,NULL,NULL);


delete from flags where flag_name="missing_exon" and issue_id > 0 and checked_date is null;
select count(*) from flags where flag_name="missing_exon" and issue_id > 0 and checked_date is null;
