#!/usr/bin/perl

# A hack to count the number the evidence type associated with a gene
# product.  Input is the uncompressed gene association file on STDIN.
# Output, on STDOUT, is a tab delimited list of evidence counts.  This
# can be pasted into Excel to make a pretty graph.

# first written in February 2007 by JM Cherry

use strict;

my %geneC = ();
my %geneF = ();
my %geneP = ();

while ( <> ) {
    chomp;

    my ( @c ) = split(/\t/);

#    next if ( $c[4] eq 'GO:0008150' || $c[4] eq 'GO:0003674' || $c[4] eq 'GO:0005575' );
#    next if ( $c[6] eq 'NR' );
#    print "ND\n" if ( $c[6] eq 'ND');

    if ( $c[8] eq 'P' ) {
	unless ( defined($geneP{$c[1]}) ) {
	    $geneP{$c[1]} = $c[6];
	}

	if ( defined($geneP{$c[1]}) ) {
	    if ( $geneP{$c[1]} eq 'IMP' || $geneP{$c[1]} eq 'IDA' || $geneP{$c[1]} eq 'IPI' ||
		 $geneP{$c[1]} eq 'IGI' || $geneP{$c[1]} eq 'IEP' ) {
	    } elsif ( $c[6] eq 'IMP' || $c[6] eq 'IDA' || $c[6] eq 'IPI' || $c[6] eq 'IGI' ||
		      $c[6] eq 'IEP' ) {
		$geneP{$c[1]} = $c[6];
	    } elsif ( $geneP{$c[1]} eq 'ISS' ) {
	    } elsif ( $c[6] eq 'ISS' ) {
		$geneP{$c[1]} = $c[6];
	    } elsif ( $geneP{$c[1]} eq 'IC' || $geneP{$c[1]} eq 'NR' || $geneP{$c[1]} eq 'ND' || 
		      $geneP{$c[1]} eq 'NAS' || $geneP{$c[1]} eq 'TAS' ) {
	    } elsif ( $c[6] eq 'IC' || $c[6] eq 'NR' || $c[6] eq 'ND' || $c[6] eq 'NAS' ||
		      $c[6] eq 'TAS' ) {
		$geneP{$c[1]} = $c[6];
	    } elsif ( $geneP{$c[1]} eq 'IGC' || $geneP{$c[1]} eq 'IGC') {
	    } elsif ( $c[6] eq 'IGC' || $c[6] eq 'RCA') {
		$geneP{$c[1]} = $c[6];
	    } elsif ( $geneP{$c[1]} eq 'IEA' ) {
	    } elsif ( $c[6] eq 'IEA' ) {
		$geneP{$c[1]} = $c[6];
	    } else {
		print "** P ** shouldn't get here $_\n";
	    }
	}
    }

    if ( $c[8] eq 'F' ) {
	unless ( defined($geneF{$c[1]}) ) {
	    $geneF{$c[1]} = $c[6];
	}

	if ( defined($geneF{$c[1]}) ) {
	    if ( $geneF{$c[1]} eq 'IMP' || $geneF{$c[1]} eq 'IDA' || $geneF{$c[1]} eq 'IPI' ||
		 $geneF{$c[1]} eq 'IGI' || $geneF{$c[1]} eq 'IEP' ) {
	    } elsif ( $c[6] eq 'IMP' || $c[6] eq 'IDA' || $c[6] eq 'IPI' || $c[6] eq 'IGI' ||
		      $c[6] eq 'IEP' ) {
		$geneF{$c[1]} = $c[6];
	    } elsif ( $geneF{$c[1]} eq 'ISS' ) {
	    } elsif ( $c[6] eq 'ISS' ) {
		$geneF{$c[1]} = $c[6];
	    } elsif ( $geneF{$c[1]} eq 'IC' || $geneF{$c[1]} eq 'NR' || $geneF{$c[1]} eq 'ND' || 
		      $geneF{$c[1]} eq 'NAS' || $geneF{$c[1]} eq 'TAS' ) {
	    } elsif ( $c[6] eq 'IC' || $c[6] eq 'NR' || $c[6] eq 'ND' || $c[6] eq 'NAS' ||
		      $c[6] eq 'TAS' ) {
		$geneF{$c[1]} = $c[6];
	    } elsif ( $geneF{$c[1]} eq 'IGC' || $geneF{$c[1]} eq 'IGC') {
	    } elsif ( $c[6] eq 'IGC' || $c[6] eq 'RCA') {
		$geneF{$c[1]} = $c[6];
	    } elsif ( $geneF{$c[1]} eq 'IEA' ) {
	    } elsif ( $c[6] eq 'IEA' ) {
		$geneF{$c[1]} = $c[6];
	    } else {
		print "** F ** shouldn't get here $c[1] - $c[6]\n";
	    }
	}
    }

    if ( $c[8] eq 'C' ) {
	unless ( defined($geneC{$c[1]}) ) {
	    $geneC{$c[1]} = $c[6];
	}

	if ( defined($geneC{$c[1]}) ) {
	    if ( $geneC{$c[1]} eq 'IMP' || $geneC{$c[1]} eq 'IDA' || $geneC{$c[1]} eq 'IPI' ||
		 $geneC{$c[1]} eq 'IGI' || $geneC{$c[1]} eq 'IEP' ) {
	    } elsif ( $c[6] eq 'IMP' || $c[6] eq 'IDA' || $c[6] eq 'IPI' || $c[6] eq 'IGI' ||
		      $c[6] eq 'IEP' ) {
		$geneC{$c[1]} = $c[6];
	    } elsif ( $geneC{$c[1]} eq 'ISS' ) {
	    } elsif ( $c[6] eq 'ISS' ) {
		$geneC{$c[1]} = $c[6];
	    } elsif ( $geneC{$c[1]} eq 'IC' || $geneC{$c[1]} eq 'NR' || $geneC{$c[1]} eq 'ND' || 
		      $geneC{$c[1]} eq 'NAS' || $geneC{$c[1]} eq 'TAS' ) {
	    } elsif ( $c[6] eq 'IC' || $c[6] eq 'NR' || $c[6] eq 'ND' || $c[6] eq 'NAS' ||
		      $c[6] eq 'TAS' ) {
		$geneC{$c[1]} = $c[6];
	    } elsif ( $geneC{$c[1]} eq 'IGC' || $geneC{$c[1]} eq 'IGC') {
	    } elsif ( $c[6] eq 'IGC' || $c[6] eq 'RCA') {
		$geneC{$c[1]} = $c[6];
	    } elsif ( $geneC{$c[1]} eq 'IEA' ) {
	    } elsif ( $c[6] eq 'IEA' ) {
		$geneC{$c[1]} = $c[6];
	    } else {
		print "** C ** shouldn't get here $c[1] - $c[6]\n";
	    }
	}
    }
}

my $g1C = 0;
my $g2C = 0;
my $g3C = 0;
my $g4C = 0;
my $g5C = 0;

my $g1F = 0;
my $g2F = 0;
my $g3F = 0;
my $g4F = 0;
my $g5F = 0;

my $g1P = 0;
my $g2P = 0;
my $g3P = 0;
my $g4P = 0;
my $g5P = 0;

foreach my $x ( keys %geneC ) {
    if ( $geneC{$x} eq 'IMP' || $geneC{$x} eq 'IDA' || $geneC{$x} eq 'IPI' || 
	 $geneC{$x} eq 'IGI' || $geneC{$x} eq 'IEP' ) {
	$g1C++;
    } elsif ( $geneC{$x} eq 'ISS' ) {
	$g2C++;
    } elsif ( $geneC{$x} eq 'IC' || $geneC{$x} eq 'NR' || $geneC{$x} eq 'ND' || 
	      $geneC{$x} eq 'NAS' || $geneC{$x} eq 'TAS' ) {
	$g3C++;
    } elsif ( $geneC{$x} eq 'IGC' || $geneC{$x} eq 'RCA' ) {
	$g4C++;
    } elsif ( $geneC{$x} eq 'IEA' ) {
	$g5C++;
    }
}

foreach my $x ( keys %geneF ) {
    if ( $geneF{$x} eq 'IMP' || $geneF{$x} eq 'IDA' || $geneF{$x} eq 'IPI' || 
	 $geneF{$x} eq 'IGI' || $geneF{$x} eq 'IEP' ) {
	$g1F++;
    } elsif ( $geneF{$x} eq 'ISS' ) {
	$g2F++;
    } elsif ( $geneF{$x} eq 'IC' || $geneF{$x} eq 'NR' || $geneF{$x} eq 'ND' || 
	      $geneF{$x} eq 'NAS' || $geneF{$x} eq 'TAS' ) {
	$g3F++;
    } elsif ( $geneF{$x} eq 'IGC' || $geneF{$x} eq 'RCA' ) {
	$g4F++;
    } elsif ( $geneF{$x} eq 'IEA' ) {
	$g5F++;
    }
}

foreach my $x ( keys %geneP ) {
    if ( $geneP{$x} eq 'IMP' || $geneP{$x} eq 'IDA' || $geneP{$x} eq 'IPI' || 
	 $geneP{$x} eq 'IGI' || $geneP{$x} eq 'IEP' ) {
	$g1P++;
    } elsif ( $geneP{$x} eq 'ISS' ) {
	$g2P++;
    } elsif ( $geneP{$x} eq 'IC' || $geneP{$x} eq 'NR' || $geneP{$x} eq 'ND' || 
	      $geneP{$x} eq 'NAS' || $geneP{$x} eq 'TAS' ) {
	$g3P++;
    } elsif ( $geneP{$x} eq 'IGC' || $geneP{$x} eq 'RCA' ) {
	$g4P++;
    } elsif ( $geneP{$x} eq 'IEA' ) {
	$g5P++;
    }
}

print "$g1P\t$g2P\t$g3P\t$g4P\t$g5P\n";
print "$g1F\t$g2F\t$g3F\t$g4F\t$g5F\n";
print "$g1C\t$g2C\t$g3C\t$g4C\t$g5C\n";

exit;
