#!/usr/bin/perl -w

use strict;

use Test::More qw(no_plan);

use lib "/Users/gwg/go/scratch/tools/";

use Data::Dumper;
$Data::Dumper::Sortkeys = 1;

use_ok( 'GO::GeneralPurposeParser' );

my $parser = GO::GeneralPurposeParser->new({ return_mode => 'SUCCESS_HASH' });         # create an object

is( defined $parser, 1, 'object creation' );
isa_ok( $parser, 'GO::GeneralPurposeParser');

my $installation_dir = '/Users/gwg/go/scratch/tools/';  # alter as necessary

my $results;
my $data;
my $should_be;

my $shift_and_return = sub { my $x = shift; return $x; };

=comment
Testing plan:

* test file opening / reading / parsing

* test the different types of parser

* test 
=cut

=cut

### FILE PARSING TESTS
my $file_list = [ 'test_data/pretend_file.txt', 'test_data/wrong_permissions.txt', 'test_data/empty_file.txt', 'test_data/test_file.txt' ];

$_ = $installation_dir . $_ foreach @$file_list;

## this file does not exist
$results = $parser->parse_from_file({ file => $file_list->[0] });
isn't( $results->{success}, 1, 'parse_from_file: nonexistent file');


## this file has the wrong permissions
$results = $parser->parse_from_file({ file => $file_list->[1] });
isn't( $results->{success}, 1, 'parse_from_file: wrong file permissions');


## this file is empty
$results = $parser->parse_from_file({ file => $file_list->[2] });
isn't( $results->{success}, 1, 'parse_from_file: empty file');


## this file is OK
$results = $parser->parse_from_file({ file => $file_list->[3] });
is( $results->{success}, 1, 'parse_from_file: valid test file');

## the file contents look like this:
## name	age	alias
## Joe	12	Darth Vader
## Nancy	15	Cleopatra, Queen Of The Nile
$should_be = [ 
'name	age	alias',
'Joe	12	Darth Vader',
'Nancy	15	Cleopatra, Queen Of The Nile',
];

is_deeply( $results->{results}, $should_be, 'parse_from_file: valid file, line break separated');

print STDERR "results: ".Dumper($results->{results});

## change the record separator to an invalid format
$results = $parser->parse_from_file({ file => $file_list->[3], file_separator => [ 'hello mum' ], });
isn't( $results->{success}, 1, 'parse_from_file: invalid separator format');

## change the separator to something that's not present
$results = $parser->parse_from_file({ file => $file_list->[3], file_separator => 'blob', });
$should_be = [ 
'name	age	alias
Joe	12	Darth Vader
Nancy	15	Cleopatra, Queen Of The Nile
', ];
print STDERR "results: ".Dumper($results->{results});
is_deeply( $results->{results}, $should_be, 'parse_from_file: separator not present');

## change the record separator and check it is still OK
$results = $parser->parse_from_file({ file => $file_list->[3], file_separator => '	', });
$should_be = [ 
'name', 'age', 'alias
Joe', '12', 'Darth Vader
Nancy', '15', 'Cleopatra, Queen Of The Nile
',
];
print STDERR "results: ".Dumper($results->{results});
is_deeply( $results->{results}, $should_be, 'parse_from_file: valid file, tab separated');

$parser->reset_parser();



### ARRAY PARSING TESTS

my $parser_list = 
[	{ parser => { type => 'delimited', delimiter => '	' }}, # delimited
	{ parser => { type => 'regexp', regexp => qr/(.+?)	(.*)/ }}, # regexp
	{ parser => { type => 'tag_value', tag_val_sep => '	' }}, # tag-val
	{ parser => { type => 'subroutine', subr => sub { my $x = shift; my @arr = split(/	/, $x, 2); return [ @arr ] }, }, }, # subroutine
];

## check that an empty data array throws an error
my $accum_success = 0;

foreach (@$parser_list)
{	$results = $parser->parse_array(undef, $_);
	$accum_success++ if $results->{success} && $results->{success} == 1;
	$parser->reset_parser();
}
is( $accum_success, 0, 'parse_array: no data');

# array exists but contents are undefined
$accum_success = 0;
foreach (@$parser_list)
{	$results = $parser->parse_array([ undef, '', undef, 0, '' ], $_);
	$accum_success++ if $results->{success} && $results->{success} == 1;
	$parser->reset_parser();
}
is( $accum_success, 0, 'parse_array: array exists but contents are undefined');


# data has the wrong ref type
$accum_success = 0;
foreach (@$parser_list)
{	$results = $parser->parse_array({ 'x' => 1, 'y' => 0 }, $_);
	$accum_success++ if $results->{success} && $results->{success} == 1;
	$parser->reset_parser();
}
is( $accum_success, 0, 'parse_array: wrong ref type');


$accum_success = 0;
# a valid array - hurrah! :D
foreach (@$parser_list)
{	$results = $parser->parse_array([ 'Pete	Tong', 'Jenny	Spinning', 'Data	Frenzy'], $_);
	$accum_success++ if $results->{success} && $results->{success} == 1;
	$parser->reset_parser();
}
is( $accum_success, 4, 'parse_array: valid array');

# valid array, but without useful data
# each parser is using a tab to split up the data
# the results that we SHOULD get:
# tab-delimited: success, even though there are no tabs present, because
#                data is parsed using split() - the first value is kept
# regexp: failure - no match for (.+?)\t(.*)
# tag-value: success, because is based on split again
# subroutine: this sub is based on split, so will be successful
$accum_success = 0;
foreach (@$parser_list)
{	$results = $parser->parse_array([ 1, 2, 3 ], $_ );
	$accum_success++ if $results->{success} && $results->{success} == 1;
	$parser->reset_parser();
}
is( $accum_success, 3, 'parse_array: valid array');


## Let's check out those parsers now.

# invalid parser
$results = $parser->parse_array([ 1, 2, 3], { parser => { type => 'made up parser' }});
isn't( $results->{success}, 1, 'parser tests: invalid parser name');
#print STDERR "\nerror_list: ".Dumper($results->{msg_list})."\n\n";
$parser->reset_parser();

# invalid parser, from the inner_parser subroutine in parse_from_file
$results = $parser->parse_from_file({ file => $file_list->[3], inner_parser => { parser => { type => 'read_file' }} });
isn't( $results->{success}, 1, 'parser tests: invalid parser in parse_from_file\'s inner_parser');
$parser->reset_parser();


### PARSER-SPECIFIC TESTS

$data = [ "name, age, alias", "Joe, 12, Darth Vader", "Nancy, 15, Cleopatra, Queen Of The Nile" ];


### DELIMITED PARSER

# delimited parser, delimiter is ''
$results = $parser->parse_array($data, {
	parser => { type => 'delimited', delimiter => '' },
});
isn't( $results->{success}, 1, 'delimiter is length 0');


# delimited parser, delimiter is undefined
$results = $parser->parse_array($data, {
	parser => { type => 'delimited', delimiter => undef },
});
### this uses the default... should we have an error instead?
isn't( $results->{success}, 1, 'delimiter is undefined');

# delimited parser, delimiter is in the wrong format
$results = $parser->parse_array($data, {
	parser => { type => 'delimited', delimiter => [ 'hello mum' ] },
});
isn't( $results->{success}, 1, 'delimiter is in the wrong format');


# delimited parser, delimiter has all manner of funny characters in it
$results = $parser->parse_array([ "here's ( some ( data ", "here ( is ( some ( more"], {
	parser => { type => 'delimited', delimiter => '(' },
});
is_deeply( $results->{results}, [["here's", 'some', 'data '], ['here','is','some','more']], 'delimiter has all manner of funny characters in it');


# delimited parser, delimiter has all manner of funny characters in it, is_regexp = 1
$results = $parser->parse_array([ "here's ( some ( data ", "here ( is ( some ( more "], {
	parser => { type => 'delimited', delimiter => '(', is_regexp => 1 },
});
isn't( $results->{success}, 1, 'delimiter is an invalid regexp');


$should_be = [['name','age','alias'],['Joe','12','Darth Vader'],['Nancy','15','Cleopatra','Queen Of The Nile']];


# delimited, no field_list specified
$results = $parser->parse_array($data, {
	parser => { type => 'delimited', delimiter => ', ' },
});
is_deeply( $results->{results}, $should_be, "delimited, no field_list specified");

$parser->reset_parser();


# same, but with a regexp
$results = $parser->parse_array($data, {
	parser => { type => 'delimited', delimiter => qr/, / },
});
is_deeply( $results->{results}, $should_be, "delimited, no field_list specified");

$parser->reset_parser();


# slightly different data, same results
$results = $parser->parse_array([ "name - age: alias", "Joe - 12: Darth Vader", "Nancy - 15: Cleopatra, Queen Of The Nile" ], {
	parser => { type => 'delimited', delimiter => qr/[,:-] / },
});
is_deeply( $results->{results}, $should_be, "delimited, no field_list specified");

$parser->reset_parser();


# slightly different data, same results
$results = $parser->parse_array([ "name - age: alias", "Joe - 12: Darth Vader", "Nancy - 15: Cleopatra, Queen Of The Nile" ], {
	parser => { type => 'delimited', delimiter => '[,:-] ', is_regexp => 1 },
});
is_deeply( $results->{results}, $should_be, "delimited, no field_list specified");

$parser->reset_parser();


# delimited, has_header_row specified
$results = $parser->parse_array($data, {
	parser => { type => 'delimited', delimiter => ', ', has_header_row => 1, },
});

$should_be = [ { name => 'Joe', age => 12, alias => 'Darth Vader' },
{ name => 'Nancy', age => 15, alias => 'Cleopatra, Queen Of The Nile' }];

is_deeply( $results->{results}, $should_be, "delimited, has_header_row specified");


# delimited, field_list specified
$results = $parser->parse_array($data, {
	parser => { type => 'delimited', delimiter => ', ', field_list => [ 'name', 'age', 'alias' ], },
});

$should_be = [ { name => 'name', age => 'age', alias => 'alias' },
{ name => 'Joe', age => 12, alias => 'Darth Vader' },
{ name => 'Nancy', age => 15, alias => 'Cleopatra, Queen Of The Nile' }];

is_deeply( $results->{results}, $should_be, "delimited, field_list specified");


# delimited, field_list specified, return as an array of tag-value pairs
$results = $parser->parse_array($data, {
	parser => { type => 'delimited', delimiter => ', ', has_header_row => 1, },
});

$should_be = [ #{ name => 'name', age => 'age', alias => 'alias' },
{ name => 'Joe', age => 12, alias => 'Darth Vader' },
{ name => 'Nancy', age => 15, alias => 'Cleopatra, Queen Of The Nile' }];

is_deeply( $results->{results}, $should_be, "delimited, field_list specified");


# create objects from our data
$results = $parser->parse_array($data, {
	parser => { type => 'delimited', delimiter => ', ', has_header_row => 1, },
	return_object => 'TestPerson',
});

## we should get two TestPerson objects
is( ref($results->{results}) eq 'ARRAY' && scalar @{$results->{results}} == 2 && ref($results->{results}[0]) eq 'TestPerson' && ref($results->{results}[1]) eq 'TestPerson' && $results->{success} == 1, 1, 'testing object creation with the delimited parser');

print STDERR "return object: ".Dumper($results)."\n";
$parser->reset_parser;


### REGEXP PARSER

$data = [ "Joe, aged 12, played Darth Vader", "Nancy, aged 15, played Cleopatra, Queen Of The Nile" ];


# regexp parser, no arguments
$results = $parser->parse_array($data, {
	parser => { type => 'regexp' },
});
isn't( $results->{success}, 1, 'regexp parser, no arguments');


# invalid parser arguments
$results = $parser->parse_from_file({ file => $file_list->[3], inner_parser => { parser => { type => 'regexp' }, }, });
isn't( $results->{success}, 1, 'regexp parser, no arguments, parse_from_file' );
#print STDERR "\nerror_list: ".Dumper($results->{msg_list})."\n\n";
$parser->reset_parser();


# invalid regexp supplied
$results = $parser->parse_from_file({ file => $file_list->[3], inner_parser => { parser => { type => 'regexp', regexp => undef }, }, });
isn't( $results->{success}, 1, 'parser tests: undef regexp' );


# invalid regexp supplied
$results = $parser->parse_from_file({ file => $file_list->[3], inner_parser => { parser => { type => 'regexp', regexp => [ qr/(my text) is (here)/ ] }, }, });
isn't( $results->{success}, 1, 'parser tests: invalid regexp format' );


# invalid regexp supplied
$results = $parser->parse_from_file({ file => $file_list->[3], inner_parser => { parser => { type => 'regexp', regexp => '(' }, }, });
isn't( $results->{success}, 1, 'parser tests: invalid regexp supplied' );
$parser->reset_parser();


# regexp parser, dodgy regexp supplied
$results = $parser->parse_array($data, {
	parser => { type => 'regexp', regexp => '^.*?, aged \d+, played .+$' },
});
isn't( $results->{success}, 1, 'regexp parser, dodgy regexp supplied');


# regexp parser, no field list, no non-space characters in data
$results = $parser->parse_array([ "", "     		", "	  

", "   " ], {
	parser => { type => 'regexp', regexp => '^(.*?), aged (\d+), played (.+)$' },
});
isn't( $results->{success}, 1, 'regexp parser, no non-space characters in data');


# regexp parser, no field list, string regexp
$results = $parser->parse_array($data, {
	parser => { type => 'regexp', regexp => '^(.*?), aged (\d+), played (.+)$' },
});

$should_be = [ ['Joe', 12, 'Darth Vader'], ['Nancy', 15, 'Cleopatra, Queen Of The Nile'] ];
is_deeply( $results->{results}, $should_be, 'regexp parser, no field list');


# regexp parser, no field list, Regexp supplied
$results = $parser->parse_array($data, {
	parser => { type => 'regexp', regexp => qr/^(.*?), aged (\d+), played (.+)$/ },
});
is_deeply( $results->{results}, $should_be, 'regexp parser, no field list');


# regexp parser, field list specified
$results = $parser->parse_array($data, {
	parser => { type => 'regexp', regexp => '^(.*?), aged (\d+), played (.+)$', field_list => ['name', 'age', 'alias'], },
});
$should_be = [ { name => 'Joe', age => 12, alias => 'Darth Vader' },
{ name => 'Nancy', age => 15, alias => 'Cleopatra, Queen Of The Nile' }];
is_deeply( $results->{results}, $should_be, 'regexp parser, field list specified');


# regexp parser, mismatched mapping supplied
$results = $parser->parse_array($data, {
	parser => { type => 'regexp', regexp => '^(.*?), aged (\d+), played ([a-zA-Z ]+),? ?(\S.+)?$', field_list => ['name', 'age', 'alias', 'subtitle'] },
});

$should_be = [ { name => 'Joe', age => 12, alias => 'Darth Vader' },
{ name => 'Nancy', age => 15, alias => 'Cleopatra', subtitle => 'Queen Of The Nile' }];
is_deeply( $results->{results}, $should_be, 'regexp parser, mismatched mapping supplied');
$parser->reset_parser;


# regexp parser, mismatched mapping supplied
$results = $parser->parse_array($data, {
	parser => { type => 'regexp', regexp => '^(.*?), aged (\d+), played ([a-zA-Z ]+),? ?(\S.+)?$', field_list => ['name', 'age'] },
});

$should_be = [ { name => 'Joe', age => 12 }, { name => 'Nancy', age => 15 } ];
is_deeply( $results->{results}, $should_be, 'regexp parser, mismatched mapping supplied');

print STDERR "errors: ".Dumper($parser->get_all_errors);

$parser->reset_parser;


### TAG-VALUE PARSER

# tag value parser, tag_val_sep is undefined
$results = $parser->parse_array($data, {
	parser => { type => 'tag_value', tag_val_sep => '' },
});
### this uses the default... should we have an error instead?
isn't( $results->{success}, 1, 'tag value parser, tag_val_sep undefined');
if (! $results->{success} )
{	print STDERR "no success! ".Dumper( $results );
}


# tag value parser, tag_val_sep is in the wrong format
$results = $parser->parse_array($data, {
	parser => { type => 'tag_value', tag_val_sep => [ 'hello mum' ] },
});
isn't( $results->{success}, 1, 'tag_val_sep is in the wrong format');


# tag value parser, tag_val_sep has all manner of funny characters in it
$results = $parser->parse_array([ "here's ( some ( data ", "here ( is ( some ( more"], {
	parser => { type => 'tag_value', tag_val_sep => '(' },
});
is_deeply( $results->{results}, [{ "here's" => 'some ( data ' },{ 'here','is ( some ( more' }], 'tag_val_sep has all manner of funny characters in it');


# tag value parser, tag_val_sep has all manner of funny characters in it, is_regexp = 1
$results = $parser->parse_array([ "here's ( some ( data ", "here ( is ( some ( more "], {
	parser => { type => 'tag_value', tag_val_sep => '(', is_regexp => 1 },
});
isn't( $results->{success}, 1, 'tag_val_sep is an invalid regexp');


# tag-value parser, no non-whitespace data
$results = $parser->parse_array([ "", "     		", "	  

", "   " ], {
	parser => { type => 'tag_value' },
});
is( $results->{results}, undef, 'tag-value parser, no data at all' );


# tag-value parser, separator only
$data = [ "+", "	+

", "  +    " ];
$results = $parser->parse_array($data, {
	parser => { type => 'tag_value', tag_val_sep => '+' },
});
is( $results->{results}, undef, 'tag-value parser, separator plus whitespace only' );


$data = [ "joe - 12", "nancy - 15", "tracy - 22" ];

# tag-value parser, no values found
$results = $parser->parse_array($data, {
	parser => { type => 'tag_value' },
});
$should_be = [ { "joe - 12" => undef }, { "nancy - 15" => undef }, { "tracy - 22" => undef } ];
is_deeply( $results->{results}, $should_be, 'tag-value parser, default TVS, no values found' );


# tag-value parser, good data
$results = $parser->parse_array($data, {
	parser => { type => 'tag_value', tag_val_sep => '-' },
});
$should_be = [ { "joe" => "12" }, { "nancy" => "15" }, { "tracy" => "22" } ];
is_deeply( $results->{results}, $should_be, 'tag-value parser, good data' );


# tag-value parser, no tag
$data = [ "+joe - 12", "+nancy - 15", "+tracy - 22" ];
$results = $parser->parse_array($data, {
	parser => { type => 'tag_value', tag_val_sep => '+' },
});
$should_be = [ { '' => "joe - 12" }, { '' => "nancy - 15" }, { '' => "tracy - 22" } ];
is_deeply( $results->{results}, $should_be, 'tag-value parser, no tag' );


$data = [ "Joe: aged 12, played Darth Vader", "Nancy, aged 15, played Cleopatra, Queen Of The Nile" ];
$should_be = [{ 'Joe' => 'aged 12, played Darth Vader' },{ 'Nancy' => 'aged 15, played Cleopatra, Queen Of The Nile' }];


# tag value, range of separators
$results = $parser->parse_array($data, {
	parser => { type => 'tag_value', tag_val_sep => '[,:] ', is_regexp => 1 },
});
is_deeply( $results->{results}, $should_be, "tag value, range of separators, good data");

$parser->reset_parser();


# same, but with a regexp
$results = $parser->parse_array($data, {
	parser => { type => 'tag_value', tag_val_sep => qr/,|: / },
});
is_deeply( $results->{results}, $should_be, "tag value, good data, regexp");
$parser->reset_parser();


## what are we going to do with capturing mappings?!
# same, but with a regexp
$results = $parser->parse_array($data, {
	parser => { type => 'tag_value', tag_val_sep => qr/.+[:,] / },
});
is_deeply( $results->{results}, [{ '' => 'played Darth Vader' }, { '' => 'Queen Of The Nile' }], "tag value, dodgy regexp things going on");

print STDERR "results: ".Dumper($results->{results});

### SUBROUTINE PARSER

# no subroutine
$results = $parser->parse_array($data, {
	parser => { type => 'subroutine', subr => '' },
} );
isn't( $results->{success}, 1, 'Subroutine parser, no sub specified' );
$parser->reset_parser();
	
	
# Subroutine parser, wrong subroutine format
$results = $parser->parse_array($data, {
	parser => { type => 'subroutine', subr => [ sub { my $x = shift; return $x } ], },
} );
isn't( $results->{success}, 1, 'Subroutine parser, wrong subroutine format' );
$parser->reset_parser();


# Subroutine parser, wrong subroutine format
$results = $parser->parse_array($data, {
	parser => { type => 'subroutine', subr => qr/this is a regexp/ },
} );
isn't( $results->{success}, 1, 'Subroutine parser, wrong subroutine format' );
$parser->reset_parser();


# Subroutine parser, subr returns an error
$results = $parser->parse_array($data, {
	parser => { type => 'subroutine',
		subr => sub {
			my $value = shift;
			foreach (@$value)
			{	$_++;
			}
			return $value;
		},
	},
} );
isn't( $results->{success}, 1, 'Subroutine parser, subr returns an error' );
$parser->reset_parser();


$data = [ "Joe: aged 12, played Darth Vader", "Nancy: aged 15, played Cleopatra, Queen Of The Nile" ];
$should_be = [{ 'Joe' => 'aged 12, played Darth Vader' },{ 'Nancy' => 'aged 15, played Cleopatra, Queen Of The Nile' }];

# Subroutine parser, subr is okey-dokey!
$results = $parser->parse_array($data, {
	parser => { type => 'subroutine',
		subr => sub {
			my $input = shift;
			my $x = index $input, ':';
			my $tag = substr $input, 0, $x;
			my $val = substr $input, ++$x;
			$val =~ s/^\s*//;
			return { $tag => $val };
		},
	},
} );
is_deeply( $results->{results}, $should_be, 'Subroutine parser, subroutine specified' );
$parser->reset_parser();


## some more tests for the subroutine parser?

### OK, we've tested the parsers (hopefully!). Now let's check out some of the
### other exciting features and functions of the GPP.
### these include:
### filter
### prepare
### process
### return_object
### post_process
### save
=cut
$parser->reset_parser();

print STDERR "\n\n\n\n\n";

$data = "name: Nancy
age: 15
occupation: Cleopatra, Queen Of The Nile

name: Robin
age: 12
occupation: Batman's right hand man-at-arms
";

$results = $parser->parse_text($data,
	{	# first split: \n\n
		parser => { type => 'delimited', delimiter => "\n\n" },
		inner_parser => {
			parser => { type => 'delimited', delimiter => "\n" },
		},
	});

is_deeply( $results->{results}, [ 
[	"name: Nancy",
	"age: 15",
	"occupation: Cleopatra, Queen Of The Nile", ],
[	"name: Robin",
	"age: 12",
	"occupation: Batman's right hand man-at-arms" ], ], "Doubly nested parsers");

print_results();

print STDERR "\n\n\n\n\n";

## and an even more exciting example!

$results = $parser->parse_text($data,
	{	# first split: \n\n
		parser => { type => 'delimited', delimiter => "\n\n" },
		inner_parser => {
			parser => { type => 'delimited', delimiter => "\n" },
			inner_parser => {
				parser => { type => 'tag_value', tag_val_sep => ':' },
			},
		},
	});

is_deeply( $results->{results}, [ 
[	{ "name" => "Nancy" },
	{ "age" => "15" },
	{ "occupation" => "Cleopatra, Queen Of The Nile" }, ],
[	{ "name" => "Robin" },
	{ "age" => "12" },
	{ "occupation" => "Batman's right hand man-at-arms" } ], ], "Three nested parsers");
$parser->reset_parser();



## it's not particularly helpful having an array of hashes in the results, so
## let's merge them into a single hash instead.
$should_be = [
{ "name" => "Nancy", "age" => "15", "occupation" => "Cleopatra, Queen Of The Nile" },
{ "name" => "Robin", "age" => "12", "occupation" => "Batman's right hand man-at-arms" }, ];

$results = $parser->parse_text($data,
	{	# first split: \n\n
		parser => { type => 'delimited', delimiter => "\n\n" },
		inner_parser => {
			parser => { type => 'delimited', delimiter => "\n" },
			inner_parser => {
				parser => { type => 'tag_value', tag_val_sep => ':' },
				save_as => 'hash_merge',
			},
		},
	});

is_deeply( $results->{results}, $should_be, "Three nested parsers, hash merge saved");
$parser->reset_parser();


## now let's try the same data, but use the multi_field_tag_value alias
$results = $parser->parse_text($data,
	{	# first split: \n\n
		parser => { type => 'delimited', delimiter => "\n\n" },
		inner_parser => {
			parser => 'multi_field_tag_value',
			tag_val_sep => ":",
			field_sep => "\n",
		},
	});

is_deeply( $results->{results}, $should_be, "MFTV parser");

# now the same thing, but getting the data from a file instead... tricky! ;)

## now let's try the same data, but use the multi_field_tag_value alias
$results = $parser->parse_from_file({
		# first split: \n\n
		file => $installation_dir . 'test_data/test_file_2.txt',
		file_separator => "\n\n",
		inner_parser => {
			parser => 'multi_field_tag_value',
			tag_val_sep => ":",
			field_sep => "\n",
		},
	});

is_deeply( $results->{results}, $should_be, "MFTV parser, reading from a file");



=cut

## let's try some prepare subroutines

$results = $parser->parse_array([ "Nancy, 15, Cleopatra, Queen Of The Nile" ],
	{	parser => { type => 'delimited', delimiter => ', ' },
		prepare => sub { my $x = shift; return lc $x },
	},
);
is_deeply( $results->{results}, [  [ "nancy", "15", "cleopatra", "queen of the nile" ]  ], 'Preparation step added to parser' );


$parser->set_verbosity('superverbose');

## a multi-field tag-value file
$results = $parser->parse_text(

	# get the parser ready for action
	my $parser_args = {
		file => $ref_db,
		file_separator => "\n\n",
		inner_parser => {
			parser => 'multi_field_tag_value',
			return_record_as_object => 'GO::Object::Reference',
			check_input => 1,
			filter_field => qr/^[^!]/,
		},
	};


=cut

### START GO::Boolean checks ###
undef $results;
use_ok( 'GO::Boolean' , "Using GO::Boolean");

## GO::Boolean has two main methods, check_boolean_query and run_boolean_query
my $queries = [
undef,
'hello',
1,
[ 'hello', 'mum' ],
{},
{ 'OR' => {} },
{ 'OR' => { FN => 'hello mum' } },
{ 'OR' => [ { FN => qr/hello mum/ } ] },
{ 'OR' => { FN => sub { my $x = shift; return $x; } } },
{ 'OR' => [ { FN => sub { my $x = shift; return $x; } }, { FN => sub {}, }, { PARAM => sub {}, } ] },
];

## let's create some boolean queries and get GO::Boolean to check the structure is up to scratch
my $success;
foreach (@$queries)
{	$results = GO::Boolean::check_boolean_query($_);
	if (defined $results)
	{	$success++;
		print STDERR "Query validation successful!\nQuery: ".Dumper($_);
		undef $results;
	}
}

is($success, undef, "Incorrectly structured queries");

undef $success;
# redefine queries with some good valid Qs
$queries = [
{ 'OR' => [ { FN => sub { my $x = shift; return $x; } } ] },

];

foreach (@$queries)
{	$results = GO::Boolean::check_boolean_query($_);
	if (defined $results)
	{	$success++;
		undef $results;
	}
	else
	{	print STDERR "Query validation unsuccessful!\nQuery: ".Dumper($_);
	}
}

cmp_ok($success, '==', scalar @$queries, "Correctly structured queries");

### END GO::Boolean CHECKS

$parser->reset_parser;

## set up a tag-value parser
## a multi-field tag-value file

$results = $parser->parse_from_file({
		# first split: \n\n
		file => $installation_dir . 'test_data/GO.references.txt',
		file_separator => "\n\n",
		inner_parser => {
			parser => 'multi_field_tag_value',
			tag_val_sep => ":",
			field_sep => "\n",
			if_then_record => {
				# search for records which have lines that DON'T start with !
				'if' => qr/^[^!]/m, #$regexp,
			},
		},
	});

is_deeply($results->{results}, [
    {
      'author' => 'GO curators',
      'comment' => 'This reference will normally be replaced upon publ',
      'go_ref_id' => 'GO_REF:0000001',
      'title' => 'GO Consortium unpublished data',
      'year' => '1998 - present'
    },
    {
      'abstract' => 'Transitive assignment of GO terms based on InterPr',
      'alt_id' => [
        'GO_REF:0000007',
        'GO_REF:0000014',
        'GO_REF:0000016',
        'GO_REF:0000017'
      ],
      'author' => 'DDB, FB, MGI, GOA, ZFIN curators',
      'comment' => 'Formerly GOA:interpro. Note that GO annotations ba',
      'external_accession' => [
        'MGI:2152098',
        'J:72247',
        'ZFIN:ZDB-PUB-020724-1',
        'FB:FBrf0174215',
        'DDB_REF:10157',
        'SGD_REF:S000124036'
      ],
      'go_ref_id' => 'GO_REF:0000002',
      'title' => 'Gene Ontology annotation through association of In',
      'year' => '2001'
    },
    {
      'abstract' => 'Transitive assignment using Enzyme Commission iden',
      'alt_id' => 'GO_REF:0000005',
      'author' => 'GOA curators, MGI curators',
      'citation' => 'Genomics 74:121-128',
      'citation_pmid' => '11374909',
      'comment' => 'Formerly GOA:spec.',
      'external_accession' => [
        'MGI:2152096',
        'J:72245',
        'ZFIN:ZDB-PUB-031118-3',
        'SGD_REF:S000124037',
        'PMID:11374909'
      ],
      'go_ref_id' => 'GO_REF:0000003',
      'title' => 'Gene Ontology annotation based on Enzyme Commissio',
      'year' => '2001'
    }
  ], "Parser with if filter");


undef $results;
$parser->reset_parser();

$results = $parser->parse_from_file({
		# first split: \n\n
		file => $installation_dir . 'test_data/GO.references.txt',
		file_separator => "\n\n",
		inner_parser => {
			parser => 'multi_field_tag_value',
			tag_val_sep => ":",
			field_sep => "\n",
			if_then_record => {
				# search for records which have lines that start with !
				# remove the exclamation mark
				'if' => qr/^!/m, #$regexp,
				'then' => sub { 
					my $line = shift; 
					$line =~ s/^!\s*//gm; 
					return $line; 
				},
			},
		},
	});

$should_be = [ {
'version' => 'Revision: 1.37 $',
'date' => 'Date: 2008/03/06 11:58:14 $',
'Gene Ontology Reference Collection' => undef,
'data fields for this file' => '' }, 
{
'go_ref_id' => '[mandatory; cardinality 1; GO_REF:nnnnnnn]',
'alt_id' => '[not mandatory; cardinality 0,1,>1; GO_REF:nnnnnnn]',
'title' => '[mandatory; cardinality 1; free text]',
}, 
{
'If a database maintains its own internal reference collection, and' => undef,
'has a record that is equivalent to a GO_REF entry, the database\'s' => undef,
'internal ID should be included as an external_accession for the' => undef,
'corresponding GO_REF.' => undef,
'This data is available as a web page at' => undef,
'http' => '//www.geneontology.org/cgi-bin/references.cgi',
}, ];

is_deeply($results->{results}, $should_be, "Adding a 'then' subroutine for 'if' filters");

undef $results;
$parser->reset_parser();

## now with a string instead of a regexp
$results = $parser->parse_from_file({
		# first split: \n\n
		file => $installation_dir . 'test_data/GO.references.txt',
		file_separator => "\n\n",
		inner_parser => {
			parser => 'multi_field_tag_value',
			tag_val_sep => ":",
			field_sep => "\n",
			if_then_record => {
				# search for records which have lines that start with !
				# remove the exclamation mark
				'if' => '^!', #$regexp,
				'then' => sub { 
					my $line = shift; 
					$line =~ s/^!\s*//gm; 
					return $line; 
				},
			},
		},
	});

is_deeply($results->{results}, $should_be, "Same as before, using a string instead of a regexp");

## use a subroutine
$results = $parser->parse_from_file({
		# first split: \n\n
		file => $installation_dir . 'test_data/GO.references.txt',
		file_separator => "\n\n",
		inner_parser => {
			parser => 'multi_field_tag_value',
			tag_val_sep => ":",
			field_sep => "\n",
			if_then_record => {
				# search for records which have lines that start with !
				# remove the exclamation mark
				'if' => sub { my $line = shift; return 1 if $line =~ /^!/m; return undef; }, 
				'then' => sub { 
					my $line = shift; 
					$line =~ s/^!\s*//gm; 
					return $line; 
				},
			},
		},
	});

is_deeply($results->{results}, $should_be, "Same as before, but using a subroutine for the 'if' filter");

undef $results;
$parser->reset_parser();

$results = $parser->parse_from_file({
		# first split: \n\n
		file => $installation_dir . 'test_data/GO.references.txt',
		file_separator => "\n\n",
		inner_parser => {
			parser => 'multi_field_tag_value',
			tag_val_sep => ":",
			field_sep => "\n",
			if_then_record => {
				# count the number of exclamation marks in a record
				'if' => sub { my $line = shift; my $copy = $line;
					my $count = ($copy =~ s/!//gm);
					$count = 'zero' unless $count =~ /\d/;
					return $count; }, #$regexp,
				'then' => {
					8 => sub { my $line = shift; return undef; },
					3 => sub { my $line = shift; return uc $line; },
					6 => sub { my $line = shift; #print STDERR "line: ".Dumper($line)."\n";  
						$line =~ s/^!\s*//gm;
						return $line; },
					'' => sub { my $line = shift; 
				#	print STDERR "Line: ".Dumper($line); 
					$line =~ s/^[^a].*$//gm; 
				#	print STDERR "Line now: ".Dumper($line)."\n\n";
					return $line; },
#						return $line =~ s/^!\s*//gm; },
				},
			},
		},
	});

$should_be = [ {
'version' => 'Revision: 1.37 $',
'date' => 'Date: 2008/03/06 11:58:14 $',
'Gene Ontology Reference Collection' => undef,
'data fields for this file' => '' }, 
{
'!  GO_REF_ID' => '[MANDATORY; CARDINALITY 1; GO_REF:NNNNNNN]',
'!  ALT_ID' => '[NOT MANDATORY; CARDINALITY 0,1,>1; GO_REF:NNNNNNN]',
'!  TITLE' => '[MANDATORY; CARDINALITY 1; FREE TEXT]',
}, 
{	'author' => 'GO curators', },
{	'alt_id' => [ 'GO_REF:0000007', 'GO_REF:0000014', 'GO_REF:0000016', 'GO_REF:0000017' ],
	'author' => 'DDB, FB, MGI, GOA, ZFIN curators',
	'abstract' => 'Transitive assignment of GO terms based on InterPr',
},
{	'alt_id' => 'GO_REF:0000005',
	'author' => 'GOA curators, MGI curators',
	'abstract' => 'Transitive assignment using Enzyme Commission iden',
} ];

is_deeply($results->{results}, $should_be, "Using a dispatch hash instead of a subroutine");

undef $results;
$parser->reset_parser();

## now let's try some preparing, processing and post-processing

$results = $parser->parse_from_file({
		# first split: \n\n
		file => $installation_dir . 'test_data/GO.references.txt',
		file_separator => "\n\n",
		inner_parser => {
			parser => { type => 'delimited', delimiter => "\n" },
			inner_parser => {
				if_then => [{
					'if' => sub { my $line = shift;
						if ($line =~ /^!/)
						{	if ($line =~ /(Revision: |Date: )/)
							{	return 1;
							}
						}
						else
						{	return 2;
						}
					},
					'then' => {
						1 => sub { my $x = shift; $x =~ s/^!\s*//; $x =~ s/:.*?:/:/; $x =~ s/\s*\$//g; return $x; },
						2 => sub { my $x = shift; return $x; },
					},
					'when' => 'prepare',
				}],
				parser => { type => 'tag_value', tag_val_sep => ':' },
				save_as => 'hash_merge',
			},
#			return_object => 'GO::Object::Reference',
			if_then => 
			{	'if' => sub {
					my $h = shift;
					if ($h->{go_ref_id})
					{	return 1;
					}
					else
					{	return 2;
					}
				},
				'then' => {
					1 => {
						inner_parser => {
							parser => { type => 'subroutine',
								subr => sub { my $x = shift;
									return $x;
								},
							},
							return_object => 'GO::Object::Reference',
						},
					},
					2 => sub { my $x = shift;
						return $x;
					},
				},
				'when' => 'post_process',
			},
		},
	});

#print_results();

undef $results;
$parser->reset_parser();

$results = $parser->parse_from_file({
		# first split: \n\n
		file => $installation_dir . 'test_data/GO.references.txt',
		file_separator => "\n\n",
		inner_parser => {
			parser => 'mftv',
			if_then_field => {
				'if' => sub { my $line = shift;
					if ($line =~ /^!/)
					{	if ($line =~ /(Revision: |Date: )/)
						{	return 1;
						}
					}
					else
					{	return 2;
					}
				},
				'then' => {
					1 => sub { my $x = shift; $x =~ s/^!\s*//; $x =~ s/:.*?:/:/; $x =~ s/\s*\$//g; return $x; },
					2 => sub { my $x = shift; return $x; },
				},
				'when' => 'prepare',
			},
			if_then_record => 
			{	'if' => sub {
					my $h = shift;
					if ($h->{go_ref_id})
					{	return 1;
					}
					else
					{	return 2;
					}
				},
				'then' => {
					1 => {
						inner_parser => {
							parser => { type => 'dummy' },
							return_object => 'GO::Object::Reference',
						},
					},
					2 => sub { my $x = shift;
						return $x;
					},
				},
				'when' => 'post_process',
			},
		},
	});

#print_results();


#$parser->set_verbosity('superverbose');

$parser->reset_parser();
undef $results;
# let's check out what will happen if we use 'process'
$results = $parser->parse_from_file({
	file => $installation_dir . 'test_data/test_file.txt',
	file_separator => "\n",
	inner_parser => {
		parser => { type => 'delimited', 
			delimiter => qr/\t/,
		},
		process => sub { my $l = shift; 
			return uc $l;
		},
	},
});
is_deeply($results->{results}, [ ['NAME', 'AGE', 'ALIAS'], ['JOE', 12, 'DARTH VADER'], ['NANCY', '15', 'CLEOPATRA, QUEEN OF THE NILE'] ], "Process subr working on an array");
#print_results();


$parser->reset_parser();
undef $results;
# let's check out what will happen if we use 'process'
$results = $parser->parse_from_file({
	file => $installation_dir . 'test_data/test_file.txt',
	file_separator => "\n",
	inner_parser => {
		parser => { type => 'delimited', 
			has_header_row => 1,
			delimiter => qr/\t/,
		},
		process => sub { my $l = shift; 
			my $new_h;
			while ( my ($k, $v) = each (%$l) )
			{	$new_h->{uc $k} = uc $v;
			}
			return $new_h; 
		},
	},
});

is_deeply($results->{results}, [ { 'AGE' => '12', 'ALIAS' => 'DARTH VADER', 'NAME' => 'JOE' }, { 'AGE' => '15', 'ALIAS' => 'CLEOPATRA, QUEEN OF THE NILE', 'NAME' => 'NANCY' } ], "Process subr working on a hash");

## ok, time to do something useful! 

## let's do some simple parser nesting
$parser->set_verbosity('superverbose');

$parser->reset_parser();
undef $results;
$results = $parser->parse_from_file({
	file => '/Users/gwg/obo_file.obo',
	file_separator => "\n\n",
	inner_parser => {
		parser => 'mftv',
		post_process_record => sub {
			my $rec = shift;
			if (exists $rec->{'[Term]'})
			{	return { 'Term' => $rec };
			}
			elsif (exists $rec->{'[Typedef]'})
			{	return { 'Typedef' => $rec };
			}
			return { header => $rec };
		},
	},
	save_as => 'hash_merge',
});

print_results();



sub print_results {
#	my $res = shift;
	if ($results->{results})
	{	print STDERR "results: ".Dumper($results->{results})."\n";
	}
	else
	{	if (exists $results->{success})
		{	print STDERR "success: ".Dumper($results->{success})."\n";
		}
		if (exists $results->{error_list})
		{	print STDERR "error_list: ".Dumper($results->{error_list})."\n";
		}
	}
	print STDERR "\n";
}


exit(0);


