#!/usr/bin/perl -wT # GO reference collection use strict; use HTML::Template; use CGI::Carp qw(fatalsToBrowser); use HTML::Entities; use Data::Dumper; my $dir_path = '../html/'; my $template = HTML::Template->new(filename => 'refs.tmpl', die_on_bad_params => 0, path => [ $dir_path.'doc/' ] ); my $refs = $dir_path . "doc/GO.references"; # read and parse the database file my $parsed = parseFile($refs); if ($parsed->{ext_accs}) { # see if we can get the xref_abbs file, too, and get links for the xrefs my $xrefs_file = $dir_path . "doc/GO.xrf_abbs"; $parsed = get_xref_abbs($xrefs_file, $parsed); } my $sorted_data = [ sort { $a->{id} cmp $b->{id} } values %{$parsed->{by_id}} ]; #print STDOUT "" . Dumper($sorted_data); #die ""; $template->param(refLoop => $sorted_data); # send the obligatory Content-Type print "Content-Type: text/html\n\n"; # print the template print $template->output; exit; ######################################################### sub parseFile { $/ = "\n\n"; open(FH, $_[0]) or die "Can't open file $_[0]!"; my $return_data; while () { my @temp = split("\n", $_); my $info; foreach my $a (@temp) { next if $a =~ /^\!/; next if $a !~ /\S/; my ($b, $c) = split(": ", $a, 2); if ($b =~ /\S/ && $c =~ /\S/) { push @{$info->{$b}}, $c; } } next unless values %$info && $info->{go_ref_id}; # make sure we have all the mandatory stuff next unless $info->{title} && $info->{authors} && $info->{year} && $info->{abstract}; my $item; $item->{id} = $info->{go_ref_id}[0]; # ignore alt_ids my @ignorables = qw(title authors year abstract comment is_obsolete); foreach my $key (@ignorables){ $item->{$key} = encode_entities($info->{$key}[0]) if $info->{$key}; } if ($info->{citation} && $info->{citation}[0] =~ /PMID:(\d+)/) { $item->{citation_pmid} = $1; $item->{citation_url} = "http://www.ncbi.nlm.nih.gov/pubmed/$1"; $item->{citation} = $info->{citation}[0]; } if ($info->{external_accession}) { map { if ($_ =~ /(.+?):.+/) { $return_data->{ext_accs}{ lc $1 }++; } } @{$info->{external_accession}}; $item->{external_accession_str} = encode_entities( join("; ", sort @{$info->{external_accession}}) ); $return_data->{external_accession}{ $item->{id} } = $info->{external_accession}; } $return_data->{by_id}{ $item->{id} } = $item; } close FH; return $return_data; } sub get_xref_abbs { my $file = shift; my $data = shift; $/ = "\n\n"; if (open(FH, $file)) { # go through the file and see if we can find URLs for any of those xrefs while () { if ($_ =~ /^abbreviation: (.+?)$/sm) { next unless exists $data->{ext_accs}{ lc $1 }; my $db = lc $1; ## check we have the url syntax if ($_ =~ /^url_syntax: (\S+?)\[example_id\](\S*?)$/sm) { $data->{url_syntax}{$db}{pre} = $1; $data->{url_syntax}{$db}{post} = $2 if $2; } } } close FH; return $data unless values %{$data->{url_syntax}}; # now map the xrefs on to the external_accessions foreach my $id (keys %{$data->{by_id}}) { next unless $data->{by_id}{$id}{external_accession_str}; $data->{by_id}{$id}{external_accession_str} = join("; ", map { if ($_ =~ /(.+?):(.+)/ && $data->{url_syntax}{ lc $1 }) { '' . $_ . ''; } else { $_; } } sort { lc $a cmp lc $b } @{$data->{external_accession}{$id}}); # delete $data->{by_id}{$id}{external_accession}; } } else { warn "Could not open $file: $!"; } return $data; }