
=head1 NAME

gencode_tracking_system / set_relations

=head1 DESCRIPTION

Helper script for the GENCODE tracking system, to be run nightly.

Connect to the internal tracking system db, compare flagged issues
and insert relation for overlapping genomic regions.

=head1 CONTACT

Felix Kokocinski, fsk@sanger.ac.uk

=head1 COPYRIGHT

Copyright Felix Kokocinski, 2008-2009, 
supported by Wellcome Trust Sanger Institute (UK) 
and National Human Genome Research Institute (USA).




=cut

use strict;
use warnings;
use gencode_tracking_system::core;
use gencode_tracking_system::config;

my $current_gene_id;
my $current_transcript_id;
my $current_feature_id;
my $current_gene_description;
my $das_source;

my $user_name = "trackingsystem";
my $statuses  = '';

my $flags;
my $relation_count = 0;
my $flagcount = 0;
my @flags;
my $loc = 0;
my $veryverbose = 0;

#connect to tracking system db
my $tracking_dbh = connect_db($DBHOST, $DBPORT, $DBNAME, $DBUSER, $DBPASS)
  or die "cant connect to to database $DBNAME @ $DBHOST.\n";
if($VERBOSE){ print "Conected to $DBNAME @ $DBHOST.\n" }

#system user
my $user_id = get_user_id( $user_name, $tracking_dbh );

#get ALL flags
#my $resolved = "no";
#my $all_flags = get_flag($dbh, undef, undef, undef, undef, $resolved);

my $sql = "select distinct Tchrom from issues;";
my $chroms_sth = $tracking_dbh->prepare($sql);
$chroms_sth->execute();
$sql = "select distinct i.id, i.Tchrom, i.Tstart, i.Tend from issues i, flags f ".
       "where i.id=f.issue_id and f.checked_date is NULL and i.Tchrom=?";
my $all_flags_sth = $tracking_dbh->prepare($sql);

#delete existing relations
$sql = "truncate issue_relations;";
my $delete_sth = $tracking_dbh->prepare($sql);
$delete_sth->execute();
$delete_sth->finish();

while( my $chrom = ($chroms_sth->fetchrow_array)){
  $all_flags_sth->execute($chrom);
  @flags = ();
  my $relationcount = 0;

  while(my ($issue_id, $issue_chrom, $issue_start, $issue_end) = $all_flags_sth->fetchrow_array){
    my %flag = (
		'issue_id'      => $issue_id,
		'start'         => $issue_start,
		'end'           => $issue_end,
		'seen'          => 0,
	       );

    push( @flags, \%flag);
    $flagcount++;
  }

  print "\nFound $flagcount flags on chromosome $chrom ";# if($VERBOSE);
  my $clustered_flags = cluster_things(\@flags);

  print "   ...in ".(scalar @$clustered_flags)." locations\n";# if($VERBOSE);

  foreach my $location (@$clustered_flags){
    my @relations = ();
    $loc++;
    print $loc.":  ".$location->{start}."-".$location->{end}.": ".scalar @{ $location->{features} }."\n" if($veryverbose);
    if((scalar @{ $location->{features} }) >1){

      foreach my $loc_feature ( @{ $location->{features} } ){
	print "\t".$loc_feature->{issue_id}."\n" if($veryverbose);
	push(@relations, $loc_feature->{issue_id});
      }
      @relations = sort @relations;
      for(my $r=0; $r < ((scalar @relations) + 1); $r++){
	my $dbid1 = $relations[$r];
	for(my $o=$r+1; $o < (scalar @relations); $o++){
	  my $dbid2 = $relations[$o];
	  print "\tSetting relation between $dbid1 & $dbid2\n" if($veryverbose);
	  issue_relation($tracking_dbh, $dbid1, $dbid2);
	  $relationcount++;
	}
      }

    }
    #last if($loc>50);
  }
  print "  ...set $relationcount relations"; #if($VERBOSE);
}

$chroms_sth->finish();
$chroms_sth->finish();


sub cluster_things {
  my ($features, $dont_save_features) = @_;

  my @clusters      = ();
  my @finalclusters = ();

 FEATURE:
  foreach my $feature (sort  { $a->{start} == $b->{start} ?  ( $b->{end} <=> $a->{end} ) : ( $a->{start} <=> $b->{start} ) } @$features) {
  CLUSTER:
    foreach my $cluster (@clusters) {
      if (($feature->{end} > $cluster->{start} and $feature->{start} < $cluster->{end})) {
	#add to existing cluster
	if($feature->{start} < $cluster->{start}){
	  $cluster->{start} = $feature->{start};
	}
	if($feature->{end} > $cluster->{end}){
	  $cluster->{end} = $feature->{end};
	}
	unless($dont_save_features){
	  push (@{$cluster->{features}}, $feature);
	}
	next FEATURE;
      }
    }

    #create new cluster
    my %newcluster;
    $newcluster{start}    = $feature->{start};
    $newcluster{end}      = $feature->{end};
    unless($dont_save_features){
      $newcluster{features} = [$feature];
    }
    push(@clusters, \%newcluster);
  }

  #join overlapping clusters
  @clusters = sort {$a->{start} <=> $b->{start}} @clusters;

  for(my $i = 0; $i < (scalar @clusters); $i++) {
    if(exists($clusters[$i+1]) and ($clusters[$i]->{end} > $clusters[$i+1]->{start})){
      $clusters[$i]->{end} = $clusters[$i+1]->{end};
      unless($dont_save_features){
	push (@{$clusters[$i]->{features}}, @{$clusters[$i+1]->{features}});
      }
      push(@finalclusters, $clusters[$i]);
      $i++;
      next;
    }
    push(@finalclusters, $clusters[$i]);
  }

  return(\@finalclusters);
}
