#!/usr/bin/perl

#
# build obo file if needed
#

use strict;
use Time::Local;

use constant OBO2FLAT => "/share/go/bin/oboedit/current/obo2flat";
use constant LOGFILE => "/share/go/logs/build_goslim.log";
use constant GOSLIMDIR => "/share/ftp/pub/go/GO_slims";

my @obofiles = ( "goslim_generic.go", "goslim_goa.go", "goslim_plant.go", "goslim_yeast.go");

my %filetimes = ();

open (LOG, ">>" . LOGFILE) || die "Cannot open log file, " . LOGFILE . "\n";

#
# select LOGFILE so that flat2obo and cvs output is captured
#
select (LOG); $| = 1;

print "---------------\n";
print scalar localtime, "\n";
print "---------------\n\n";

foreach my $file (@obofiles) {
    #  index to the array from stat() in next line:
    #  $dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks
    #
    #  we'll use the "last modify time" of the file, index 9
    #
    my ( @stats ) = stat GOSLIMDIR . "/$file";

    $filetimes{$file} = $stats[9];
}

# Check creation time of one file
my ( @obostats ) = stat GOSLIMDIR . "/goslim_generic.obo";
my $obotime = timelocal(localtime( $obostats[9] ));

my $rebuild = 0;

foreach my $file (@obofiles) {
    my $filetime = timelocal(localtime($filetimes{$file}));

    if ($filetime == 0) {
	die "Required file, $file, is missing!\n";
    }

    if ($obotime == 0) {
	die "Required file, " . GOSLIMDIR . "/goslim_generic.obo, is missing!\n";
    }

    if ($filetime > $obotime) {
	print "NEWER\t$file than OBO $filetime vs $obotime\n";
	# flat file will be newer than OBO because of this script
	warn "Someone updated a GO Slim flat file, $file\n";
	$rebuild = 0;
    } else {
	print "OLDER\t$file than OBO $filetime vs $obotime\n";
	# rebuild flat file is OBO file is newer
	#  that means the OBO file has been updated by a curator or editor
	$rebuild = 1;
    }
}

#
# if OBO file is newer than one flat files or the definition file
# the OBO file needs to be rebuilt.
#

if ($rebuild) {
    print "\nRebuild of flat files starting.\n";

    my $dstr = `date +%Y%m%d`;
    chomp $dstr;

    # clone LOG filehandler to STDERR
    open (STDOUT, ">&LOG") || die "Cannot dup LOG filehander for STDOUT: $!\n";
    open (STDERR, ">&LOG") || die "Cannot dup LOG filehander for STDERR: $!\n";

    foreach my $file ( @obofiles ) {
	# rebuild flat file from OBO file
	my $obosrc = $file;
	$obosrc =~ s/\.go/\.obo/;

	my $cmd = OBO2FLAT . " --gopresets /tmp/p.$obosrc.go /tmp/c.$obosrc.go /tmp/f.$obosrc.go /tmp/d.$obosrc.go " . GOSLIMDIR . "/$obosrc < /dev/null";

	my $status = system($cmd);

	print "OBO2FLAT returned status = $status\n";

	if ($status) { die "FATAL: flat2obo execution failed: $!\n"; }

	&cleanflats($file);

	# commit updated flat file
	my $cmd = "/usr/bin/csh -c 'unlimit; /tools/gnu/bin/cvs -d /share/go/cvs commit -m $dstr " . GOSLIMDIR . "/$file'";

	my $status = system($cmd);

	if ($status) { die "FATAL: cvs commit failed: $!\n"; }

	system ("/usr/bin/rm -f /tmp/p.$obosrc.go /tmp/c.$obosrc.go /tmp/f.$obosrc.go /tmp/d.$obosrc.go");

    }

} else {
    print "Update exiting, flat files are up-to-date\n";
}

print "\n";

close LOG;

exit 0;

#
# don't need all the header
#
sub cleanflats {
    my ($outfile) = @_;

    my $obofile = $outfile;
    $obofile =~ s/\.go/\.obo/;

    open (OUTFLAT, ">" . GOSLIMDIR . "/$outfile");

    open (INBP, "/tmp/p.$obofile.go");

    while ( <INBP> ) {
	print OUTFLAT "$_";
    }

    close (INBP);

    open (INCC, "/tmp/c.$obofile.go");

    while ( <INCC> ) {
	chomp;
	unless ( ($_ =~ /^\!/) || ($_ =~ /^\$Gene_Ontology/)) {
	    print OUTFLAT "$_\n";
	}
    }

    close (INCC);

    open (INMF, "/tmp/f.$obofile.go");

    while ( <INMF> ) {
	chomp;
	unless ( ($_ =~ /^\!/) || ($_ =~ /^\$Gene_Ontology/)) {
	    print OUTFLAT "$_\n";
	}
    }
    close (INMF);

    close (OUTFLAT);
}
