#!/usr/bin/perl -w
# read in a mapping file, remove current mappings and replace with new mappings
# Works!

use strict;
use Data::Dumper;

my $verbose = $ENV{GO_VERBOSE} || 1;
my $data;

my $mapping = 'go/external2go/reactome2go-temp';
my $ontology = 'go/ontology/editors/gene_ontology_write.obo';
my $dbname = 'Reactome';
my $infile = shift @ARGV;
if (! $infile)
{	warn "No input file found. Using go/external2go/reactome2go";
	$infile = $mapping;
}
if (! -e $infile)
{	die "Could not find input file $infile!";
}

my $status = `tr -cd '\11\12\40-\176' < $infile | sort -u > $mapping`;

if ($status)
{	die("$status");
}

## read in the refs
open(IN, "< $mapping") or die "Could not open $mapping: $!\n";
while (<IN>)
{	if (/^\!/)
	{	push @{$data->{header}}, $_;
	}
	elsif (/\w/)
	{	my $x = 0;
		my $txt = $_;
		## check the line is in the right format
		if ($txt =~ /[^\x00-\x7F]/)
		{	warn "Non-ASCII characters detected and removed:\n$txt";
		}

		$txt =~ s/[^\x00-\x7F]//g;
		$txt =~ s/ {2,}/ /g;
		$txt =~ s/ +,/,/g;
		my @arr;
		if ($txt =~ /^$dbname:(\S+) (.+)(?<= \> )GO:.+ ; (GO:\d+)/)
		{	@arr = ($1, $2, $3);
			$arr[1] =~ s/(.+)\s*\>\s*$/$1/;
			$arr[1] =~ s/\s+$//g;
#			print STDERR join("\n", @arr) . "\n\n";
			$data->{'map'}{ $arr[2] }{ $arr[0] } = $arr[1];
			$data->{all_reac_ids}{ $arr[0] }++;
			$data->{all_go_ids}{$arr[2]}++;
		}
		else
		{	warn "Line was not parsed:\n$txt";
		}
	}
}
close(IN);

warn "Found " . scalar( keys %{$data->{map}} ) . " GO terms and " . scalar(keys %{$data->{all_reac_ids}}) . " Reactome IDs";

##print STDERR "data: " . Dumper( $data->{map} ) . "\n";

warn "Finished reading Reactome mapping" if $verbose;

## read in the ontology file and update the refs appropriately
$/ = "\n\n";
open(IN2, "< $ontology") or die "Could not open $ontology: $!\n";
open(OUT, "> $ontology.new") or die "Could not open $ontology.new: $!\n";

while (<IN2>)
{	if (/\[Term\]/sm)
	{	if (/xref: $dbname:/smi)
		{	# delete all existing refs for the db
			$_ =~ s/xref: $dbname.+?\n//gism;
		}

		unless ( /obsolete: true/ism )
		{	# update with new names / IDs
			my $xrefs;
			while (/^(alt_)?id: (GO:\d{7})/smg)
			{	my $id = $2;
				if ($data->{'map'}{$id})
				{	## add the data to the stanza
					foreach (keys %{$data->{map}{$id}})
					{	$xrefs->{$_} = $data->{map}{$id}{$_};
						## bookkeeping
						delete $data->{all_reac_ids}{$_};
					}
					## bookkeeping
					delete $data->{all_go_ids}{$id};
				}
			}
			if ($xrefs && %$xrefs)
			{	my $str = join("\n", map { "xref: $dbname:$_ \"" . $xrefs->{$_} . '"' } sort keys %$xrefs);
				$_ =~ s/(is_a|relationship)/$str\n$1/s;
			}
		}
	}
	print OUT $_;
}

warn "Have " . scalar(keys %{$data->{all_go_ids}}) . " GO IDs and " . scalar(keys %{$data->{all_reac_ids}}) . " Reactome IDs left!";
if (scalar (keys %{$data->{all_go_ids}}) != 0)
{	print STDERR "GO IDs: " . Dumper($data->{all_go_ids});
}
if (scalar(keys %{$data->{all_reac_ids}}))
{	print STDERR "Reactome IDs: " . Dumper($data->{all_reac_ids});
}

close(IN2);
close(OUT);

warn "Finished writing ontology file. Please check go/ontology/editors/gene_ontology_write.obo.new";

exit(0);
