
=head1 NAME

gencode_tracking_system::scripts::update

=head1 DESCRIPTION

Helper script for the AnnoTrack tracking system.
Test servers and run update process for every source as defined in config.

=head1 CONTACT

Felix Kokocinski, fsk@sanger.ac.uk

=head1 COPYRIGHT

Copyright Felix Kokocinski, 2008-2010, 
supported by Wellcome Trust Sanger Institute (UK) 
and National Human Genome Research Institute (USA).

You may distribute this module under the same terms as perl itself, 
citing the original source.

=cut

use strict;
use warnings;
use Getopt::Long;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use UNIVERSAL::require;
#from http://search.cpan.org/~mschwern/UNIVERSAL-require-0.11/lib/UNIVERSAL/require.pm

my $startt = time();

#base source name, add other variables to use more than one core annotation
my ($core_source) = keys %CORE_SERVER;

#only print the chromosomes & lengths
my $show_chroms = 0;
my $onlythis    = undef;
my $test        = 0;
my $update_all  = 1;

&GetOptions(
	    'show_chroms!'  => \$show_chroms,
	    'chrom:s'       => \$onlythis,
	    'test!'         => \$test,
	    'verbose!'      => \$VERBOSE,
	    'write!'        => \$WRITE,
	    'core!'         => \$UPDATE_CORE,
	    'all!'          => \$update_all,
	   );

#setting for alert mails
my $uemail  = $ALERT_EMAIL;
my $myemail = 'AnnoTrack@system.ac.uk';
my $subj    = '!!! AnnoTrack Update Problems !!!';
my $text    = '';
my $mailcount = 0;

my $update;
my $das_connection;

#connect to tracking system db
my $tracking_dbh = connect_db($DBHOST, $DBPORT, $DBNAME, $DBUSER, $DBPASS);

if(!$tracking_dbh){
  $text = "\nCant connect to to database $DBNAME @ $DBHOST.\n";
  death_alert($text, $uemail, $myemail, $subj);
}

print STDERR "\n---------------------------------------------\n".
             "         AnnoTrack data update script".
             "\n---------------------------------------------\n".
             "Connected to = $DBNAME @ $DBHOST\n".
             "write        = ".$WRITE.
             "\nshow_chroms  = $show_chroms\nupdate_core  = $UPDATE_CORE\n".
  "update_lists = $UPDATE_LISTS\nverbose      = $VERBOSE\n\n";
print STDERR "test-chrom = $onlythis\n" if($onlythis);

#wait a little to allow emergency break by user
sleep(5);

#prepare sql
my $prepare_hash = prepare_statements($tracking_dbh);

#connect to otter das server
$das_connection = connect_das( $CORE_SERVER{$core_source}->{'dns'},
			       $CORE_SERVER{$core_source}->{'proxy'} );

if(!$das_connection){
  $text = "\nConnection to DAS source $core_source failed!\n";
  death_alert($text, $uemail, $myemail, $subj, 1);
}
print "-------------------------------------\n".
      "connected to DAS server ".$CORE_SERVER{$core_source}->{'dns'}.
      ".\n-------------------------------------\n";

#get entry point list/lengths from DAS source
my $chrom_lens = get_entry_points($das_connection);

#get static version if source is down
if(!$chrom_lens or !(keys %$chrom_lens)){
  $chrom_lens = get_entry_points(undef, 1);
}

if(!$chrom_lens or !(keys %$chrom_lens)){
  $text = "\nCould not get entry points!\n";
  death_alert($text, $uemail, $myemail, $subj, 0);
}

my @ordered_chroms = sort _sortbychrnum keys %$chrom_lens;

if($show_chroms){
  #just spit out the chromosome lengths and end
  foreach my $k (@ordered_chroms){
    print "$k\t".$chrom_lens->{$k}."\n";
  }
  exit 0;
}

print "Have ".(scalar keys %$chrom_lens)." entry points.\n" if($VERBOSE);

#update core annotation
update_core() if($UPDATE_CORE);


#update other sources
if($update_all){
  foreach my $das_source (sort keys %OTHER_SERVERS) {
    print "\n-------------------------------------\nLooking at $das_source.".
          "\n-------------------------------------\n";
    $das_connection = undef;

    if(!($OTHER_SERVERS{$das_source}->{'active'})){
      print "Not active.\n";
      next;
    }

    if(defined($OTHER_SERVERS{$das_source}->{'dns'})){

      #connect to external das server
      print "contacting ".$OTHER_SERVERS{$das_source}->{'dns'}.".\n" if $VERBOSE;
      $das_connection = connect_das( $OTHER_SERVERS{$das_source}->{'dns'},
				     $OTHER_SERVERS{$das_source}->{'proxy'} );

      if(!$das_connection or !($das_connection->isa('Bio::Das::Lite'))){
	$text = "\nConnection to DAS source $das_source failed!\n";
	death_alert($text, $uemail, $myemail, $subj, 0);
	next;
      }
      if($VERBOSE){ print "connected to DAS server $das_source.\n"; }

    }

    #run update on sources
    $update = source_update($das_connection, $tracking_dbh, $prepare_hash,
			    \%OTHER_SERVERS, $das_source, $chrom_lens);

    if(!$update){
      $text = "\nCould not run update on $das_source!\n";
      death_alert($text, $uemail, $myemail, $subj, 0);
      next;
    }

  }
}


#release prepares
release_prepares($prepare_hash);

#disconnect tracking system db
disconnect_db($tracking_dbh);

print "\nDONE\n";

my $endt = time();
print "Time taken was ", ($endt - $startt), " seconds\n\n";


##################################


=head2 update_core

  Description: update the core annoation specicified in the contig file

=cut

sub update_core {

  #run update on core annotation
  #use DAS server
#  $update = source_update($das_connection, $tracking_dbh, $prepare_hash,
#			  \%CORE_SERVER, $core_source, $chrom_lens);

  #or use database connection
  #get user id
  my $user_id = get_user_id( $CORE_SERVER{$core_source}->{'user_name'}, $tracking_dbh );
  die "Can't find user ".$CORE_SERVER{$core_source}->{'user_name'} if !$user_id;

  #get category id
  my $category_id = get_category_id( $CORE_SERVER{$core_source}->{'category'}, $tracking_dbh );

  if(!$test){
    #re-set seen flags
    print "Resetting seen-flags.\n\n";
    reset_seen_flags($tracking_dbh, undef, $CORE_SERVER{$core_source}->{'category'});
  }


  {
    no strict 'refs';
    no warnings 'redefine';

    my $packname;
    if( defined($CORE_SERVER{$core_source}->{'update_function'}) ){
      $packname = "gencode_tracking_system::sources::".$CORE_SERVER{$core_source}->{'update_function'};
    }
    else{
      print STDERR "Missing core updat function!\n";
      return 0;
    }
    $packname->use;
    my $funcname = "run_update";
    eval{
      $update = ($packname . "::" . $funcname) -> ($tracking_dbh, $prepare_hash,
						   $user_id, $category_id, $chrom_lens);
    };
    if($@){
      print STDERR "ERROR IN UPDATE FUNCTION:\n".$@."\n";
      return 0;
    }
  }

  if(!$update){
    $text = "\nCould not run update on $core_source!\n";
    death_alert($text, $uemail, $myemail, $subj, 1);
  }

  if(!$test){
    #use flags to identify outdated entries
    print "Checking seen-flags.\n" if $VERBOSE;
    analyse_seen_flags($tracking_dbh, undef, $CORE_SERVER{$core_source}->{'category'}, $user_id);
  }

}


=head2 source_update

  Arg [1]    : das connection
  Arg [2]    : db handle
  Arg [3]    : hash with prepared statements
  Arg [4]    : das server (hash ref) from config
  Arg [5]    : name of das source
  Arg [6]    : hash with chromosomes and lengths
  Description: update one of the external sources via DAS
  Returntype : boolean, 1 on success

=cut


sub source_update {
  my ($das_c, $tracking_dbh, $prepare_hash, $das_server, $das_source, $chrom_lens) = @_;

  if(!$onlythis){
    #re-set seen flags
    print "Resetting seen-flags.\n";
    reset_seen_flags($tracking_dbh, undef, $das_server->{$das_source}->{'category'});
  }

  #get user id
  my $user_id = get_user_id( $das_server->{$das_source}->{'user_name'}, $tracking_dbh );
  die "Can't find user ".$das_server->{$das_source}->{'user_name'} if !$user_id;

  #get category id
  my $category_id = get_category_id( $das_server->{$das_source}->{'category'}, $tracking_dbh );
  #add new category
  if(!$category_id){
    print "Adding category ".$das_server->{$das_source}->{'category'}.".\n";
    $category_id = add_category($tracking_dbh, $das_server->{$das_source}->{'category'},
				$das_server->{$das_source}->{'description'});
  }
  if(!$category_id){ die "Can't get category ".$das_server->{$das_source}->{'category'}."\n"; }

  #get type name if required
  my $type_request = $das_server->{$das_source}->{'type'};

  if(($das_server->{$das_source}->{'by_chrom'}) == 1){
    #run update over every chromosome

    foreach my $chrom (@ordered_chroms){

      next if($onlythis and ($onlythis ne $chrom));

      if($type_request and $type_request =~ /\,/){
	#go through all types of this source
	my @types = split(',', $type_request);
	foreach my $type (@types){
	  print "Using DAS type $type\n" if($VERBOSE);
	
	  if($CHUNKS){
	    #go through chromosome chunk by chunk
	    go_through_chunks(1, $chrom_lens->{$chrom}, $chrom, $chrom_lens->{$chrom},
			      $type, $das_c, $das_source, $user_id, $category_id);
	  }
	  else{
	    #get features at once
	    get_features($das_c, $chrom, 1, $chrom_lens->{$chrom}, $type,
			 $das_source, $user_id, $category_id);
	  }
	}
      }
      else {
	#only one type
	if($CHUNKS){
	  #go through chromosome chunk by chunk
	  go_through_chunks(1, $chrom_lens->{$chrom}, $chrom, $chrom_lens->{$chrom},
			    $type_request, $das_c, $das_source, $user_id, $category_id);
	}
	else{
	  #get features at once
	  get_features($das_c, $chrom, 1, $chrom_lens->{$chrom}, $type_request,
		       $das_source, $user_id, $category_id);
	}
      }

    }

  }
  elsif( (($das_server->{$das_source}->{'by_chrom'}) == 0) && $UPDATE_LISTS){
    #run update once (list-based sources)

    print "RUNNING script for $das_source.\n" if($VERBOSE);
    print "gencode_tracking_system::sources::".$das_source."::run_update()\n";
    #call source-specific update function with features
    #load dynamically using UNIVERSAL->require
    {
      no strict 'refs';
      no warnings 'redefine';

      my $packname;
      if( defined($OTHER_SERVERS{$das_source}->{'update_function'}) ){
	$packname = "gencode_tracking_system::sources::".$OTHER_SERVERS{$das_source}->{'update_function'};
      }
      else{
	$packname = "gencode_tracking_system::sources::".$das_source;
      }
      my $funcname = "run_update";
      my $featnum = 0;

      $packname->use;

      eval{
	$featnum = ($packname . "::" . $funcname) -> ($tracking_dbh, $prepare_hash,
					   $user_id, $category_id, $das_source);
      };
      if($@){
	print "ERROR IN UPDATE FUNCTION:\n".$@."\n";
	return 0;
      }

      print "\nThere were $featnum features.\n";
    }
  }
  elsif(($das_server->{$das_source}->{'by_chrom'}) == 2) {
    #run update id based

    my $core_category_id = get_category_id( $CORE_SERVER{$core_source}->{'category'},
					      $tracking_dbh );
    if(!$core_category_id){
      die "Can't get CORE category!\n";
    }

    print "RUNNING script for $das_source.\n" if($VERBOSE);
    print "gencode_tracking_system::sources::".$das_source."::run_update()\n";

    #call source-specific update function with features
    #load dynamically using UNIVERSAL->require
    {
      no strict 'refs';
      no warnings 'redefine';
      my $packname = "gencode_tracking_system::sources::".$das_source;
      my $funcname = "run_update";
      my $featnum = 0;
      my @features_col = ();

      $packname->use;

      foreach my $chromosome (@ordered_chroms){

	my $transcript_ids = get_transcripts($tracking_dbh, 1, $core_category_id,
					     undef, undef, 'subject', $chromosome);
	print "Received ".(scalar @$transcript_ids)." transcript_ids ($chromosome).\n";

	#get all features from chunk
	print "Getting features from for CORE-IDs.\n" if($VERBOSE);
	foreach my $core_id (@$transcript_ids){
	  my $features = undef;
	  my $url;

	  print "ID: $core_id   " if($VERBOSE);

	  my $response = get_das_response($das_c, $core_id, $type_request);
	  ($url, $features) = %$response;
	  if(!$features){
	    print "-\n" if($VERBOSE);
	    next;
	  }

	  push(@features_col, $features);

	  print "+\n" if($VERBOSE);

	  #call update function with batches of X features
	  if((scalar @features_col) > 5){

	    print "calling update with ".(scalar @features_col)." features.\n" if($VERBOSE);

	    eval{
	      $featnum += ($packname . "::" . $funcname) -> ($tracking_dbh, $prepare_hash, $user_id,
							     $category_id, \@features_col, $chromosome);
	    };
	    if($@){
	      print "ERROR IN UPDATE FUNCTION:\n".$@."\n";
	      return 0;
	    }

	    @features_col = ();

	    #last; ###########################

	  }
	}
	#call the update with the remaining fatures
	if(scalar @features_col){

	  print "calling update with ".(scalar @features_col)." features.\n" if($VERBOSE);

	  eval{
	    $featnum += ($packname . "::" . $funcname) -> ($tracking_dbh, $prepare_hash,
							   $user_id, $category_id, \@features_col);
	  };
	  if($@){
	    print "ERROR IN UPDATE FUNCTION:\n".$@."\n";
	    return 0;
	  }

	  @features_col = ();
	}

      }

      print "\nThere were $featnum features.\n";
    }

  }
  else{
    print "Dont know what to do with this source!\n";
  }

  if(!$onlythis){
    #use flags to identify outdated entries
    print "Checking seen-flags.\n" if $VERBOSE;
    analyse_seen_flags($tracking_dbh, undef, $das_server->{$das_source}->{'category'}, $user_id);
  }

  return 1;
}


=head2 go_through_chunks

 Arg [1]    : chunk start
 Arg [2]    : chunk end
 Arg [3]    : chromosome name
 Arg [4]    : chromosome length
 Arg [5]    : optional type of DAS source
 Arg [6]    : das connection
 Description: loop over chromosome in fixed-sized chunks,
              fetch DAS features and call specific update function
 Returntype : none

=cut


sub go_through_chunks {
  my ($chunk_start, $chunk_end, $chromosome, $chrom_len, $type_request,
      $das_c, $das_source, $user_id, $category_id) = @_;

  print "---\n" if($VERBOSE);
  my $cf = 0;

  my ($region_start, $region_end);

  #loop through regions until all is covered
  for($region_start = $chunk_start, $region_end = $region_start + $MAXCHUNK;
      $region_start < $chunk_end;
      $region_start = $region_end + 1, $region_end += $MAXCHUNK){

    if($region_end > $chrom_len){
      $region_end = $chrom_len;
    }elsif($region_end > $chunk_end){
      $region_end = $chunk_end;
    }

    get_features($das_c, $chromosome, $region_start, $region_end, $type_request,
		 $das_source, $user_id, $category_id);

  }
}


=head2 get_features

 Arg [1]    : DAS connection
 Arg [2]    : chromosome name
 Arg [3]    : chromosome length
 Arg [4]    : optional type of DAS source
 Arg [5]    : type request
 Arg [6]    : DAS source
 Arg [7]    : user id
 Arg [8]    : category id
 Description: call the DAS function call and
              dynamically call the source-specific update function
 Returntype : none

=cut


sub get_features {
    my ($das_c, $chromosome, $region_start, $region_end, $type_request,
	$das_source, $user_id, $category_id) = @_;

    my %ids_seen;
    my $region = ":".$region_start.",".$region_end;

    #get all features from chunk
    print "Getting features from chromosome $chromosome : $region_start, $region_end.\n" if($VERBOSE);
    my $features = undef;
    $features = get_das_response($das_c, $chromosome, $region_start, $region_end, $type_request);

    #show_das_response($features) if $VERBOSE;

    #call source-specific update function with features
    #load dynamically using UNIVERSAL->require
    {
      no strict 'refs';
      no warnings 'redefine';
      my $packname = "gencode_tracking_system::sources::".$das_source;
      my $funcname = "run_update";
      my $new_ids  = undef;

      $packname->use;

      eval{
	$new_ids = ($packname . "::" . $funcname) -> ($features, $chromosome, $tracking_dbh,
						      $prepare_hash, $user_id, $category_id,
						      \%ids_seen, $das_source);
      };
      if($@){
	print "ERROR IN UPDATE FUNCTION:\n".$@."\n";
	return 0;
      }

      print "\nThere were ".(scalar keys %$new_ids)." features in ".$chromosome.$region.".\n";

      #keep track of features seen to avoid duplicates!
      %ids_seen = (%ids_seen, %$new_ids);

      $packname->nouse;

  }

}


=head2 death_alert

 Arg [1]    : message text
 Arg [2]    : to-address
 Arg [3]    : from-address
 Arg [4]    : subject
 Arg [5]    : bool to die
 Description: On some failures, send email and die if neccessary
              Limit numbers of mails sent
 Returntype : none

=cut

sub death_alert{
  my ($text, $uemail, $myemail, $subj, $die) = @_;

  print $text;
  if($mailcount++<10){
    send_mail($text, $uemail, $myemail, $subj)
      or print "\nCould not send message to $uemail!\n";
  }
  die if($die);
}


=head2 _sortbychrnum

 Arg [1]    : parameter A
 Arg [2]    : parameter B
 Description: sort function for chromosome names
              borrowed from Steve Searle

=cut

sub _sortbychrnum {

  my @awords = split /_/,$a;
  my @bwords = split /_/,$b;

  my $anum = $awords[0];
  my $bnum = $bwords[0];

  $anum =~ s/chr//;
  $bnum =~ s/chr//;

  if ($anum !~ /^[0-9]*$/) {
    if ($bnum !~ /^[0-9]*$/) {
      return $anum cmp $bnum;
    } else {
      return 1;
    }
  }
  if ($bnum !~ /^[0-9]*$/) {
    return -1;
  }

  if ($anum <=> $bnum) {
    return $anum <=> $bnum;
  } else {
    if ($#awords == 0) {
      return -1;
    } elsif ($#bwords == 0) {
      return 1;
    } else {
	return $awords[1] cmp $bwords[1];
      }
  }
}


__END__

Typical output:


---------------------------------------------
         AnnoTrack data update script
---------------------------------------------
Connected to = gencode_dev @ mcs4a
write        = 0
show_chroms  = 0
update_core  = 1
update_lists = 1
verbose      = 0

Connected to das source.
-------------------------------------
connected to DAS server http://das.sanger.ac.uk/das/otter_das.
-------------------------------------
Have 24 entry points.
Resetting seen-flags.

CHROMOSOME 1
>UPDATING 129048 (OTTHUMG00000007389): start
>UPDATING 180499 (OTTHUMT00000023220): end
>UPDATING 129362 (OTTHUMG00000009097): end
...


