package GO::Boolean; =head1 SYNOPSIS package GO::Boolean; Some nice little Boolean subroutines for general use =cut use strict; use lib 'go/scratch/tools'; use GO::MsgLite; use Data::Dumper; $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; =head2 check_boolean_query Checks the structure of a boolean query, just in case... The query should have the form $query = { OP1 => [ { PARAM => x, FN => y, ARGS => z }, SUB_Q => { OP2 => [ { PARAM => a, FN => b, ARGS => c, SAVE_RESULT => 1 }, { FN => d }, ], }, ], } OP can be 'AND', 'OR' or 'NOT' each subquery has the form: - FN: the test to perform; should be a function reference; can be empty if we're constructing a subquery - ARGS: the arguments for the test; arrayref; optional - PARAM: object parameter to test; optional - SAVE_RESULT: keep the result of FN OR - sub_q (a subquery) input: boolean query output: undefined if the query failed the test, with a message to STDERR explaining the problem the query if it was in the correct format (with any little problems corrected, should any have been found) =cut sub check_boolean_query { my $query = shift; if (! defined $query) { return fail_query($query, "Query is undefined!"); } # check the format is correct if (! ref $query || ref $query ne 'HASH') { return fail_query($query, "Query must be a HASH ref, not " . ( ref $query || 'SCALAR' ) ); } # check for keys and values if (! %$query || ! keys %$query || ! values %$query) { return fail_query($query, "Nothing defined in query hash!"); } # check we have one of the required keys if (! $query->{AND} && ! $query->{OR} && ! $query->{NOT}) { return fail_query($query, "Could not find a valid boolean operator in query"); } # check we don't have too many of the required keys! if ( (grep { $_ =~ /^(AND|NOT|OR)$/ } keys %$query) > 1) { return fail_query($query, "Too many operators in query"); } # check that we have something appropriate in the function list foreach my $op qw(AND NOT OR) { next unless $query->{$op}; # we have a key, but nothing in the value! if (! defined $query->{$op}) { return fail_query($query, "No operations defined in query"); } # check that the operations are in the correct format if (! ref $query->{$op}) { # a scalar... we can't really do anything with this, sadly return fail_query($query, "$op query value must be an array, not SCALAR"); } elsif (ref $query->{$op} ne 'ARRAY') { return fail_query($query, "$op query value must be an array, not ". ref $query->{$op} ); } elsif (! @{$query->{$op}}) { # there's nothing in the list return fail_query($query, "$op query set is empty"); } # OK, we have an array. Now let's check the array contents are OK # the array should contain hashrefs, either to a subquery, or to the test # that should be performed foreach my $q (@{$query->{$op}}) { if (! ref $q || ref $q ne 'HASH') { return fail_query($query, "query set function must be a HASH ref, not ". ( ref $q || 'SCALAR' ) ); } if (! %$q || ! keys %$q || ! values %$q) { return fail_query($query, "Nothing defined in subquery hash!"); } # see if there is a subquery present or not if ($q->{SUB_Q}) { # see how many other keys we have... if (scalar keys %$q > 1) { printerr("warning: other keys present in $op hash!\n".Dumper($q)); } # send this off for further validation my $new_q = check_boolean_query($q->{SUB_Q}); return undef if ! $new_q; $q->{SUB_Q} = $new_q; } elsif ($q->{FN}) # this is the only param we *actually* need { # check that it is of the correct type - it should be a fn ref if (! ref $q->{FN} || ref $q->{FN} ne 'CODE') { return fail_query($query, "$op query must be a CODE ref, not ". ( ref $q->{FN} || 'SCALAR' ) ); } # if there are arguments present, check they're in the correct form # if not, just put 'em into an array and hope for the best! if ($q->{ARGS}) { if (! ref $q->{ARGS} || ref $q->{ARGS} ne 'ARRAY') { $q->{ARGS} = [ $q->{ARGS} ]; } } # check the PARAMS argument is of the correct type # but what is the correct type?!?! } else { # no recognised hash keys! return fail_query($query, "Could not find an appropriate hash key in $op query; keys are ".join(", ", keys %$q)); } } } ## Holy crap, I think we actually passed all the tests!! return $query; } sub fail_query { my $query = shift; my $str = shift; # debugme("$str\nQuery: ".Dumper($query)."\n"); print STDERR "$str\nQuery: ".Dumper($query)."\n"; # debugme("$str\nQuery: ".Dumper($query)."\n"); return undef; } =head2 run_boolean_query Run a boolean query on a test set input: query, in the form { OP1 => [ { PARAM => x, FN => y, ARGS => z }, SUB_Q => { OP2 => [ { PARAM => a, FN => b, ARGS => c } ], }, ], } OP can be 'AND', 'OR' or 'NOT' each subquery has the form: - test (the test to perform; empty if we're constructing a subquery) - args (the arguments for the test) - param (parameter to test; optional) OR - sub_q (a subquery) hash containing the objects output: hash containing the objects that have passed the test =cut sub run_boolean_query { my ($q, $test_h) = @_; # ensure that we are dealing with a hash if (ref($test_h) eq 'ARRAY') { # create a hash out of the array for ease of merging, etc. my $temp; my $acc = 1; $acc++ and $temp->{$acc} = $_ foreach @$test_h; $test_h = $temp; } elsif (! ref($test_h) ) { my $temp = $test_h; $test_h->{test} = $temp; } my $current_h; # store our results here # 1) deal with mandatory subqueries if ($q->{AND}) { #debugme("Found an 'AND' subquery"); $current_h = $test_h; foreach my $subQ ( @{$q->{AND}} ) { # update current_h to be the items that passed the test $current_h = __do_test($subQ, $current_h) or return; # we got no results for our mandatory query. Crap! return if ! $current_h || ! keys %$current_h; } # return unless $current_h; } # 2) deal with non-mandatory subqueries if ($q->{OR}) { #debugme("Found an 'OR' subquery"); # if there is no AND criterion, we are basically starting from scratch # if there are AND criteria, something suspicious is going on! if ($q->{AND}) { debugme("Possible problem... both AND and OR subqueries are present!"); } foreach my $subQ (@{$q->{OR}}) { my $results = __do_test($subQ, $test_h) or next; # debugme->("results: ".Dumper($results)); # if there are results, add them to the current list # if ($results) if ($current_h) { %$current_h = (%$results, %$current_h); } else { %$current_h = %$results; } } # debugme->("current_h: ".Dumper($current_h)); return if ! $current_h || ! keys %$current_h; } # 3) deal with negative subqueries (remove corresponding results) if ($q->{NOT}) { #debugme("Found an 'NOT' subquery"); foreach my $subQ (@{$q->{NOT}}) { my $results = __do_test($subQ, $current_h); # or next; # remove this result from the current results list delete $current_h->{$_} foreach keys %$results; last if !keys %$current_h; } if (keys %$current_h) { debugme->((scalar keys %$current_h) . " results"); } else { debugme->("No results found."); } } return $current_h; } =head2 __do_test Performs a test upon a data set input: subquery, hash of objects to be tested output: hash containing the items that passed the test =cut sub __do_test { # returns a hash containing the items that passed the test # startme(); my ($subQ, $test_h) = @_; # return if there is no test set return if !$test_h || !keys %$test_h; # if no test is specified, we're dealing with a subquery. Iterate further. return run_boolean_query($subQ->{SUB_Q}, $test_h) if !$subQ->{FN}; # otherwise, get the info and do the test my $passed_h; my $args = $subQ->{ARGS};# || []; my $save_result = $subQ->{SAVE_RESULT}; # boolean while (my ($k, $v) = each %$test_h) { my $test_set; if ($subQ->{PARAM}) { $test_set = $v->$subQ->{PARAM}; } else { $test_set = $v; } debugme->("No test set found") && next unless $test_set; if (!ref($test_set)) { #debugme->("\$_ = $test_set; \$test->(\$_, \$value) = ".Dumper( &$test( @$args, $test_set) )); # debugme->("Starting the test; test_set is $test_set; args is $args"); my $result = &{$subQ->{FN}}( (@$args, $test_set) ); if ($result) { if ($save_result) # whether or not we want to keep the result or # preserve the original value { $passed_h->{$k} = $result; } else { # keep the old value intact $passed_h->{$k} = $v; } } } else { foreach (@$test_set) { my $result = &{$subQ->{FN}}( (@$args, $_) ); # if the result is positive, put it into the passed hash if ($result) { if ($save_result) { # we want to save the result of the fn $passed_h->{$k} = $result; } else { # keep the old value intact $passed_h->{$k} = $v; } last; } } } } return $passed_h; } sub printerr { GO::MsgLite->printerr(@_); } sub debugme { GO::MsgLite->debugme(@_); } 1;