
=head1 NAME

gencode_tracking_system / import_from_ensembl

=head1 DESCRIPTION

Helper script for the GENCODE tracking system
to import selected gene / transcript models from the ENSEMBL database
as entries into GenTrack.

=head1 SYNOPSIS

perl import_from_ensembl.pl [-user Felix]
                            [-category Ensembl]
                            -gene ENSG00000012048 OR
                            -transcript ENST00000309486
                            [-flag manual_selection]
                            [-note "important gene"

=head1 CONTACT

Felix Kokocinski, fsk@sanger.ac.uk

=head1 COPYRIGHT

Copyright Felix Kokocinski, 2008-2010, 
supported by Wellcome Trust Sanger Institute (UK) 
and National Human Genome Research Institute (USA).

You may distribute this module under the same terms as perl itself, 
citing the original source.

=cut

use strict;
use warnings;
use Getopt::Long;
use gencode_tracking_system::core;
use gencode_tracking_system::config;
use Date::Format;

my $user_name = "trackingsystem";
my $category  = "Ensembl";
my ($gene_id, $transcript_id);
my ($gene, $transcript, $flag, $details);
my $flag_count = 0;

&GetOptions(
	    'gene=s'       => \$gene_id,
	    'transcript=s' => \$transcript_id,
	    'user=s'       => \$user_name,
	    'category=s'   => \$category,
	    'flag=s'       => \$flag,
	    'note=s'       => \$details,
	   );

if(!($gene_id or $transcript_id)){
  print STDERR "\nGene or transcript id missing.\n\n";
  exit;
}

#connect to tracking system db
my $tracking_dbh = connect_db($DBHOST, $DBPORT, $DBNAME, $DBUSER, $DBPASS)
  or die "cant connect to to database $DBNAME @ $DBHOST.\n";
if($tracking_dbh && $VERBOSE){ print "Conected to $DBNAME @ $DBHOST.\n" }

my $prepare_hash = prepare_statements($tracking_dbh);

#connect to ensembl db
my $ens_db = connect_ensembl($ENS_HOST, $ENS_PORT, $ENS_NAME, $ENS_USER, $ENS_PASS);
my $ens_ta = $ens_db->get_TranscriptAdaptor();
my $ens_ga = $ens_db->get_GeneAdaptor();
if($ens_db && $VERBOSE){ print "Conected to ensembl\n" }

#get category
my $e_category_id = get_category_id( $category, $tracking_dbh );
#get user
my $user_id = get_user_id( $user_name, $tracking_dbh );

if(!($e_category_id or $user_id)){
  print STDERR "Can't find user or category.\n";
  exit;
}

if($gene_id){
  $gene = $ens_ga->fetch_by_stable_id($gene_id);
  #print STDERR "gene = $gene\n";

  #insert gene
  my $g_id = insert_object($gene, 'gene', 0, $gene_id);

  #insert all transcripts of this gene
  my $t_id;
  foreach my $transcript (@{$gene->get_all_Transcripts}){
    $t_id = insert_object($transcript, 'transcript', $g_id, $gene->stable_id);

    if($flag){
      set_flag_on_transcript($t_id, $flag, $details);
    }

    #insert all exons of this transcript
    foreach my $exon (@{$transcript->get_all_Exons}){
      my $exon_id = insert_object($exon, 'exon', $t_id, $transcript->stable_id, $g_id);
    }
  }

}
else{
  $transcript = $ens_ta->fetch_by_stable_id($transcript_id);
  #print STDERR "transcript = $transcript\n";
  $gene = $ens_ga->fetch_by_transcript_stable_id($transcript_id);
  #print STDERR "gene = $gene\n";

  #insert parent gene of the transcript
  my $gene_id = insert_object($gene, 'gene', 0, "");

  #insert transcript
  my $transcript_id = insert_object($transcript, 'transcript', $gene_id, $gene->stable_id);

  if($flag){
    set_flag_on_transcript($transcript_id, $flag, $details);
  }

  #insert all exons of this transcript
  foreach my $exon (@{$transcript->get_all_Exons}){
    my $exon_id = insert_object($exon, 'exon', $transcript_id, $transcript->stable_id, $gene_id);
  }
}

if($flag){
  print "\nSet $flag_count flags.\n";
}


sub set_flag_on_transcript{
  my ($id, $flag_name, $details) = @_;

  #check if a flag was set / resolved already
  my $set_flag = 1;
  my $flags = get_flag($tracking_dbh, $id, $flag_name, undef, undef, undef);

  if(scalar @$flags){
    #set seen status
    foreach my $flag (@$flags){
      set_seen_flag($prepare_hash, $id, "flag", "2");
      print " Set seen:".($id)."\n";
    }
    print STDERR "  Found existing flag $flag_name.\n" if($VERBOSE);
    $set_flag = 0;
  }
  if($set_flag){
    #set tag / write flag
    print STDERR "  Setting flag $flag_name to ".$id.".\n" if($VERBOSE);
    if($WRITE){ set_flag($tracking_dbh, $id, $flag_name, undef,
			 $user_id, undef, $details); }

    $flag_count++;
  }
}

sub insert_object{
  my ($object, $type, $parent_id, $parent_name, $greatparent_id) = @_;

  my %object;
  my @object_description;

  #format timestamps
  my $timestamp_created = time2str("%Y-%m-%dT%H:%M:%S%z", $object->created_date);
  my $timestamp_modified = time2str("%Y-%m-%dT%H:%M:%S%z", $object->modified_date);
  #print STDERR "date = $timestamp_created, $timestamp_modified\n";

  #create object hash
  $object{'chrom'}       = $object->seq_region_name;
  $object{'type'}        = $type;
  $object{'start'}       = $object->seq_region_start;
  $object{'end'}         = $object->seq_region_end;
  $object{'strand'}      = check_strand($object);
  $object{'created_on'}  = $timestamp_created;
  $object{'updated_on'}  = $timestamp_modified;
  $object{'id'}          = $object->stable_id;
  $object{'version'}     = '1';
  $object{'parent'}      = $parent_name;
  if($type eq "exon"){
    $type = "subfeature";
    $object{'phase'}     = check_phase($object);
  }
  else{
    $greatparent_id = $parent_id;
    $object{'biotype'}   = $object->biotype;
    $object{'status'}    = $object->status;

    if($object->description){ push(@object_description, 'Description: '.$object->description) }
    if($object->display_xref){ push(@object_description, ucfirst($type).'name: '.$object->display_xref->display_id) }
  }
  $object{'description'} = join("\n", @object_description);

  #store to db
  my ($current_id, $updated) = store_features($tracking_dbh, $prepare_hash, \%object, $type,
					      $greatparent_id, $parent_id, $user_id, $e_category_id);
  print STDERR "Stored $type with ID $current_id.\n";

  return $current_id;
}


sub check_strand{
  my $feat = shift;

  my $strand  = ".";
  if($feat->strand eq "1" or $feat->strand eq "+1" or $feat->strand eq "+"){
    $strand = "+";
  }
  elsif($feat->strand eq "-1" or $feat->strand eq "-"){
    $strand = "-";
  }

  return $strand;
}


sub check_phase{
  my $feat = shift;

  my $phase = $feat->phase;
  if((!defined $phase) or ($phase == -1)){
    $phase = '-';
  }
  elsif($phase == 1){
    $phase = 2;
  }
  elsif($phase == 2){
    $phase = 1;
  }

  return $phase;
}



__END__


