#!/usr/bin/perl -w

use strict;
my $idspace;
my $verbose;
my $filter_dangling = 1; # default
my $rel;
my $diffrel = "other_isa";
while ($ARGV[0] =~ /^\-/) {
    my $opt = shift @ARGV;
    if ($opt eq '-h' || $opt eq '--help') {
        print usage();
        exit 0;
    }
    elsif ($opt eq '--rel') {
        $rel = shift;
    }
    elsif ($opt eq '--idspace') {
        $idspace = shift @ARGV;
    }
    elsif ($opt eq '--verbose' || $opt eq '-v') {
        $verbose = 1;
    }
    elsif ($opt eq '-') {
    }
    else {
        die "$opt";
    }
}

# ----------------------------------------
# load files
# ----------------------------------------

my $f1 = shift @ARGV;
my $f2 = shift @ARGV;
(my $f1_core_name =  $f1 ) =~ s/.+\///;
(my $f2_core_name =  $f2 ) =~ s/.+\///;
$f1_core_name =~ s/\.obo//;
$f2_core_name =~ s/\.obo//;
my ($h1,$b1) = readfile($f1); # goche
my ($h2,$b2) = readfile($f2); # chebi

# ----------------------------------------
# headers
# ----------------------------------------

foreach (@$h1) {
    print "$_\n" unless /^\s*$/;
}
print "subsetdef: unique_term1 \"unique to $f1_core_name\"\n";
print "subsetdef: unique_term2 \"unique to $f2_core_name\"\n";

foreach (@$h2) {
    if (/^(synonymtypedef|subsetdef)/) {
        print "$_\n";
    }
}
print "\n";

# ----------------------------------------
# write merged file
# ----------------------------------------


# 1.  When a term only appears in the chebi_lite.obo file and has an is_a relationship to another term that is only in chebi_lite.obo, the relationship between these two terms is is_a.  That makes the relationship label the same as a goche is_a relationship and makes it difficult for us using the GraphViz viewer to tell one situation from the other.  Would it be possible to label these relationships (between two CHEBI-only terms) as chebi_isa because we do not want to review these and can then ignore them?

## create a hash containing all terms in $b1 and $b2
my $all;
foreach (keys %$b1)
{	$all->{$_} += 1;
}
foreach (keys %$b2)
{	$all->{$_} += 10;
}

foreach my $id (%$b1) {
    my $v1 = $b1->{$id};
    my $v2 = $b2->{$id};
    foreach my $line (@$v1) {
        print "$line\n";
        if ($line =~ /^id:/) {
            if ($v2) {
                foreach my $line2 (@$v2) {
                    if ($line2 =~ /^is_a:\s*(\S+)(.*)/) {
                       if ($all->{$1} == 10)
                       {   print "relationship: chebi_isa $1 $2\n";
                       }
                       else
                       {   print "relationship: $diffrel $1 $2\n";
                       }
                    }
                }
                delete $b2->{$id};
            }
            else {
                print "subset: unique_term1\n";
            }
        }
    }
}

# print any remaining entries from file 2
foreach my $id (%$b2) {
    my $v2 = $b2->{$id};
    foreach my $line (@$v2) {
        print "$line\n";
        if ($line =~ /^id:/) {
            print "subset: unique_term2\n";
        }
    }
}

print "\n[Typedef]\n";
print "id: $diffrel\n";
print "name: $diffrel\n";

print "\n[Typedef]\nid: chebi_isa\nname: chebi_isa\n\n";

exit 0;

# ----------------------------------------
# utils
# ----------------------------------------

sub readfile {
    my $fn = shift;
    my $hdr = 1;
    my $id = 0;
    open(F,$fn) || die $fn;
    my $block = [];
    my %bh = ();
    my @headers = ();

    while (<F>) {
        chomp;
        if (/^\[/) {
            $hdr = 0;
            $id = 0;
            $block = [];
        }
        if ($hdr) {
            push(@headers,$_);
        }
        else {
            if (/^id:\s*(\S+)/) {
                $id = $1;
                $bh{$id} = $block;
            }
            push(@$block,$_);
        }
    }
    close(F);
    return (\@headers,\%bh);
}

sub scriptname {
    my @p = split(/\//,$0);
    pop @p;
}

sub usage {
    my $sn = scriptname();

    <<EOM;
$sn [--rel DIFF-RELATION] FILE

Example:

$sn --rel is_a goche.obo chebi.obo > visualize-goche-chebi-diffs.obo

diffs relationships between two files and produces an obo file that
shows the differences in a way that can be visualized in oboedit

TODO:

currently only isa links are diffed

EOM
}
