package GO::Boolean;

=head1 SYNOPSIS

package GO::Boolean;

Some nice little Boolean subroutines for general use

=cut

use strict;
use lib 'go/scratch/tools';
use GO::MsgLite;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;

=head2 check_boolean_query

Checks the structure of a boolean query, just in case...

The query should have the form

$query = 
{
  OP1 => 
  [ { PARAM => x, FN => y, ARGS => z },
    SUB_Q => {
      OP2 =>
      [ { PARAM => a, FN => b, ARGS => c, SAVE_RESULT => 1 },
        { FN => d }, ],
    },
  ],
}

OP can be 'AND', 'OR' or 'NOT'
each subquery has the form:
- FN: the test to perform; should be a function reference;
      can be empty if we're constructing a subquery
- ARGS: the arguments for the test; arrayref; optional
- PARAM: object parameter to test; optional
- SAVE_RESULT: keep the result of FN

OR

- sub_q (a subquery)


input:  boolean query
output: undefined if the query failed the test, with a message to STDERR explaining
        the problem
        the query if it was in the correct format
        (with any little problems corrected, should any have been found)

=cut

sub check_boolean_query {
	my $query = shift;
	
	if (! defined $query)
	{	return fail_query($query, "Query is undefined!");
	}

	# check the format is correct
	if (! ref $query || ref $query ne 'HASH')
	{	return fail_query($query, "Query must be a HASH ref, not " . ( ref $query || 'SCALAR' ) );
	}
	
	# check for keys and values
	if (! %$query || ! keys %$query || ! values %$query)
	{	return fail_query($query, "Nothing defined in query hash!");
	}
	
	# check we have one of the required keys
	if (! $query->{AND} && ! $query->{OR} && ! $query->{NOT})
	{	return fail_query($query, "Could not find a valid boolean operator in query");
	}
	
	# check we don't have too many of the required keys!
	if ( (grep { $_ =~ /^(AND|NOT|OR)$/ } keys %$query) > 1)
	{	return fail_query($query, "Too many operators in query");
	}
	
	# check that we have something appropriate in the function list
	foreach my $op qw(AND NOT OR)
	{	next unless $query->{$op};
		# we have a key, but nothing in the value!
		if (! defined $query->{$op})
		{	return fail_query($query, "No operations defined in query");
		}
		# check that the operations are in the correct format
		if (! ref $query->{$op})
		{	# a scalar... we can't really do anything with this, sadly
			return fail_query($query, "$op query value must be an array, not SCALAR");
		}
		elsif (ref $query->{$op} ne 'ARRAY')
		{	return fail_query($query, "$op query value must be an array, not ". ref $query->{$op} );
		}
		elsif (! @{$query->{$op}})
		{	# there's nothing in the list
			return fail_query($query, "$op query set is empty");
		}
		
		# OK, we have an array. Now let's check the array contents are OK
		# the array should contain hashrefs, either to a subquery, or to the test
		# that should be performed
		foreach my $q (@{$query->{$op}})
		{	if (! ref $q || ref $q ne 'HASH')
			{	return fail_query($query, "query set function must be a HASH ref, not ". ( ref $q || 'SCALAR' ) );
			}
			
			if (! %$q || ! keys %$q || ! values %$q)
			{	return fail_query($query, "Nothing defined in subquery hash!");
			}

			# see if there is a subquery present or not
			if ($q->{SUB_Q})
			{	# see how many other keys we have...
				if (scalar keys %$q > 1)
				{	printerr("warning: other keys present in $op hash!\n".Dumper($q));
				}
				# send this off for further validation
				my $new_q = check_boolean_query($q->{SUB_Q});
				return undef if ! $new_q;
				$q->{SUB_Q} = $new_q;
			}
			elsif ($q->{FN}) # this is the only param we *actually* need
			{	# check that it is of the correct type - it should be a fn ref
				if (! ref $q->{FN} || ref $q->{FN} ne 'CODE')
				{	return fail_query($query, "$op query must be a CODE ref, not ". ( ref $q->{FN} || 'SCALAR' ) );
				}
				
				# if there are arguments present, check they're in the correct form
				# if not, just put 'em into an array and hope for the best!
				if ($q->{ARGS})
				{	if (! ref $q->{ARGS} || ref $q->{ARGS} ne 'ARRAY')
					{	$q->{ARGS} = [ $q->{ARGS} ];
					}
				}
				
				# check the PARAMS argument is of the correct type
				# but what is the correct type?!?!
			}
			else
			{	# no recognised hash keys!
				return fail_query($query, "Could not find an appropriate hash key in $op query; keys are ".join(", ", keys %$q));
			}
		}
	}

	## Holy crap, I think we actually passed all the tests!!
	return $query;
}

sub fail_query {
	my $query = shift;
	my $str = shift;
#	debugme("$str\nQuery: ".Dumper($query)."\n");
	print STDERR "$str\nQuery: ".Dumper($query)."\n";
#	debugme("$str\nQuery: ".Dumper($query)."\n");
	return undef;
}


=head2 run_boolean_query

Run a boolean query on a test set

input:  query, in the form
      {
        OP1 => 
        [ { PARAM => x, FN => y, ARGS => z },
          SUB_Q => {
            OP2 =>
            [ { PARAM => a, FN => b, ARGS => c } ],
          },
        ],
      }
        OP can be 'AND', 'OR' or 'NOT'
        each subquery has the form:
        - test (the test to perform; empty if we're constructing a subquery)
        - args (the arguments for the test)
        - param (parameter to test; optional)
        
        OR
        
        - sub_q (a subquery)
        
        hash containing the objects

output: hash containing the objects that have passed the test

=cut

sub run_boolean_query {
	my ($q, $test_h) = @_;

	#	ensure that we are dealing with a hash
	if (ref($test_h) eq 'ARRAY')
	{	# create a hash out of the array for ease of merging, etc.
		my $temp;
		my $acc = 1;
		$acc++ and $temp->{$acc} = $_ foreach @$test_h;
		$test_h = $temp;
	}
	elsif (! ref($test_h) )
	{	my $temp = $test_h;
		$test_h->{test} = $temp;
	}

	my $current_h; # store our results here

	# 1) deal with mandatory subqueries
	if ($q->{AND})
	{	#debugme("Found an 'AND' subquery");
		$current_h = $test_h;
		foreach my $subQ ( @{$q->{AND}} )
		{	# update current_h to be the items that passed the test
			$current_h =  __do_test($subQ, $current_h) or return;

			#	we got no results for our mandatory query. Crap!
			return if ! $current_h || ! keys %$current_h;
		}
#		return unless $current_h;
	}

	# 2) deal with non-mandatory subqueries 
	if ($q->{OR})
	{	#debugme("Found an 'OR' subquery");
	
		#	if there is no AND criterion, we are basically starting from scratch
		#	if there are AND criteria, something suspicious is going on!
		if ($q->{AND})
		{	debugme("Possible problem... both AND and OR subqueries are present!");
		}
	
		foreach my $subQ (@{$q->{OR}})
		{	my $results =  __do_test($subQ, $test_h) or next;

#			debugme->("results: ".Dumper($results));
			#	if there are results, add them to the current list
		#	if ($results)
			if ($current_h)
			{	%$current_h = (%$results, %$current_h);
			}
			else
			{	%$current_h = %$results;
			}
		}

#		debugme->("current_h: ".Dumper($current_h));
		return if ! $current_h || ! keys %$current_h;
	}

	# 3) deal with negative subqueries (remove corresponding results)

	if ($q->{NOT})
	{	#debugme("Found an 'NOT' subquery");
		foreach my $subQ (@{$q->{NOT}}) {
		my $results =  __do_test($subQ, $current_h); # or next;
		# remove this result from the current results list
		delete $current_h->{$_} foreach keys %$results;
		last if !keys %$current_h;
		}
	
		if (keys %$current_h)
		{	debugme->((scalar keys %$current_h) . " results");
		}
		else
		{	debugme->("No results found.");
		}
	}
	return $current_h;
}

=head2 __do_test

Performs a test upon a data set

input:  subquery, hash of objects to be tested
output: hash containing the items that passed the test

=cut

sub __do_test { # returns a hash containing the items that passed the test
#	startme();
	my ($subQ, $test_h) = @_;
	# return if there is no test set
	return if !$test_h || !keys %$test_h;

	# if no test is specified, we're dealing with a subquery. Iterate further.
	return run_boolean_query($subQ->{SUB_Q}, $test_h) if !$subQ->{FN};

	# otherwise, get the info and do the test
	my $passed_h;
	my $args = $subQ->{ARGS};# || [];
	my $save_result = $subQ->{SAVE_RESULT};  # boolean

	while (my ($k, $v) = each %$test_h)
	{	my $test_set;
		if ($subQ->{PARAM})
		{	$test_set = $v->$subQ->{PARAM};
		}
		else
		{	$test_set = $v;
		}
		debugme->("No test set found") && next unless $test_set;

		if (!ref($test_set))
		{	#debugme->("\$_ = $test_set; \$test->(\$_, \$value) = ".Dumper( &$test( @$args, $test_set) ));

#			debugme->("Starting the test; test_set is $test_set; args is $args");

			my $result = &{$subQ->{FN}}( (@$args, $test_set) );
			if ($result)
			{	if ($save_result) # whether or not we want to keep the result or
				                  # preserve the original value
				{	$passed_h->{$k} = $result;
				}
				else
				{	# keep the old value intact
					$passed_h->{$k} = $v;
				}
			}
		}
		else
		{	foreach (@$test_set)
			{	my $result = &{$subQ->{FN}}( (@$args, $_) );
				#	if the result is positive, put it into the passed hash
				if ($result)
				{	if ($save_result)
					{	# we want to save the result of the fn
						$passed_h->{$k} = $result;
					}
					else
					{	# keep the old value intact
						$passed_h->{$k} = $v;
					}
					last;
				}
			}
		}
	}
	return $passed_h;
}

sub printerr {
	GO::MsgLite->printerr(@_);
}

sub debugme {
	GO::MsgLite->debugme(@_);
}

1;