#!/usr/bin/perl -wT
# GO reference collection

use strict;
use HTML::Template;
use CGI::Carp qw(fatalsToBrowser); 
use HTML::Entities;
use Data::Dumper;

my $dir_path = '../html/';
my $template = HTML::Template->new(filename => 'refs.tmpl',
				   die_on_bad_params => 0,
				   path => [ $dir_path.'doc/' ]
				  );

my $refs = $dir_path . "doc/GO.references";

# read and parse the database file
my $parsed = parseFile($refs);

if ($parsed->{ext_accs})
{	# see if we can get the xref_abbs file, too, and get links for the xrefs
	my $xrefs_file = $dir_path . "doc/GO.xrf_abbs";
	$parsed = get_xref_abbs($xrefs_file, $parsed);
}

my $sorted_data = [ sort { $a->{id} cmp $b->{id} } values %{$parsed->{by_id}} ];
#print STDOUT "" . Dumper($sorted_data);
#die "";

$template->param(refLoop => $sorted_data);

# send the obligatory Content-Type
print "Content-Type: text/html\n\n";

# print the template
print $template->output;

exit;

#########################################################

sub parseFile {
	$/ = "\n\n";
	open(FH, $_[0]) or die "Can't open file $_[0]!";
	my $return_data;
	while (<FH>)
	{	my @temp = split("\n", $_);
		my $info;
		foreach my $a (@temp)
		{	next if $a =~ /^\!/;
			next if $a !~ /\S/;
			my ($b, $c) = split(": ", $a, 2);
			if ($b =~ /\S/  && $c =~ /\S/)
			{	push @{$info->{$b}}, $c;
			}
		}

		next unless values %$info && $info->{go_ref_id};

		# make sure we have all the mandatory stuff
		next unless $info->{title} && $info->{authors} && $info->{year} && $info->{abstract};

		my $item;
		
		$item->{id} = $info->{go_ref_id}[0];
		
		# ignore alt_ids
		my @ignorables =
		  qw(title authors year abstract comment is_obsolete);
		foreach my $key (@ignorables){
		  $item->{$key} = encode_entities($info->{$key}[0])
		    if $info->{$key};
		}
		
		if ($info->{citation} && $info->{citation}[0] =~ /PMID:(\d+)/)
		{	$item->{citation_pmid} = $1;
                        $item->{citation_url} = "http://www.ncbi.nlm.nih.gov/pubmed/$1";
			$item->{citation} = $info->{citation}[0];
		}

		if ($info->{external_accession})
		{	map {
				if ($_ =~ /(.+?):.+/)
				{	$return_data->{ext_accs}{ lc $1 }++;
				}
			} @{$info->{external_accession}};

			$item->{external_accession_str} = encode_entities( join("; ", sort @{$info->{external_accession}}) );
			$return_data->{external_accession}{ $item->{id} } = $info->{external_accession};
		}
		
		$return_data->{by_id}{ $item->{id} } = $item;
		
	}
	close FH;
	return $return_data;
}

sub get_xref_abbs {
	my $file = shift;
	my $data = shift;
	$/ = "\n\n";
	
	if (open(FH, $file))
	{	
		# go through the file and see if we can find URLs for any of those xrefs
		while (<FH>)
		{	if ($_ =~ /^abbreviation: (.+?)$/sm)
			{	next unless exists $data->{ext_accs}{ lc $1 };
				my $db = lc $1;
				## check we have the url syntax
				if ($_ =~ /^url_syntax: (\S+?)\[example_id\](\S*?)$/sm)
				{	$data->{url_syntax}{$db}{pre} = $1;
					$data->{url_syntax}{$db}{post} = $2 if $2;
				}
			}
		}
		close FH;
		return $data unless values %{$data->{url_syntax}};

		# now map the xrefs on to the external_accessions
		
		foreach my $id (keys %{$data->{by_id}})
		{	next unless $data->{by_id}{$id}{external_accession_str};
			$data->{by_id}{$id}{external_accession_str} = 
			join("; ", 
				map {
					if ($_ =~ /(.+?):(.+)/ && $data->{url_syntax}{ lc $1 })
					{	'<a rel="external" href="' .
						$data->{url_syntax}{ lc $1 }{pre}
						. $2
						. ($data->{url_syntax}{ lc $1 }{post} || "")
						.'">' . $_ . '</a>';
					}
					else
					{	$_;
					}
				} sort { lc $a cmp lc $b } @{$data->{external_accession}{$id}});
#			delete $data->{by_id}{$id}{external_accession};
		}
	}
	else
	{	warn "Could not open $file: $!";
	}
	return $data;
}