=head1 SYNOPSIS package GO::Utilities; Generally useful bits and pieces for AmiGO. =cut package GO::Utilities; use strict; use lib 'go/scratch/tools'; use Exporter; use Template; use GO::TestSet; use GO::Boolean; use HTML::Entities; #use Template 2.19; #use Template::Constants qw( :all ); use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION); @ISA = ('Exporter'); #@EXPORT = qw(get_environment_param get_valid_list add_value_to_list remove_value_from_list); @EXPORT_OK = qw(get_results_chunk get_n_chunks get_valid_list add_value_to_list remove_value_from_list get_environment_param get_external_environmental_param sort_list group_data_by_param turn_into_text get_matching_objects); # process_page_template %EXPORT_TAGS = ( all => [ qw( get_results_chunk get_n_chunks get_valid_list add_value_to_list remove_value_from_list get_environment_param get_external_environmental_param sort_list group_data_by_param turn_into_text get_matching_objects ) ],# process_page_template std => [ qw(get_results_chunk get_n_chunks sort_list get_valid_list get_environment_param get_external_environmental_param ) ],# process_page_template ); use Data::Dumper; $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; =head2 get_results_chunk Arguments - list, chunk_n - chunk number; defaults to 1 chunk_size - how many items in a chunk; defaults to $ENV{AMIGO_PAGE_SIZE} chunk_by - what constitutes an 'item'; defaults to a list item returns - hash->{n_chunks} - total number of chunks, hash->{subset} - chunk X of the input =cut sub get_results_chunk { my $fullset = shift; my $chunk_args = shift; return unless $fullset && @$fullset; my $chunk_by = $chunk_args->{chunk_by} || undef; my $chunk_n = $chunk_args->{chunk_n} || 1; my $chunk_size = $chunk_args->{chunk_size} || get_environment_param('page_size'); # DEBUG # print STDERR "list size: ".(scalar @$fullset)."; n = $chunk_n, size = $chunk_size, by = " . ($chunk_by || 'undef') ."\n"; # we want everything. No changes required. return { n_chunks => 1, subset => $fullset } if $chunk_size eq 'all'; # first let's see how many chunks we have # we may need to transform the results slightly to be able to work this out my $total_items; # the number of items in the list my $results_to_chunk; # another list to store results in for easy chunking my $subset; # we'll store the results here my $temp_h; # a nice little hash for storage purposes when transforming the list my $subr; if ($chunk_by) # we know how to transform the results { #$subr = $transforms->{$chunk_by} if $transforms->{$chunk_by}; $subr = __get_me($chunk_by); } if ($subr) { my $last = ""; foreach (@$fullset) { my $item = $subr->($_); push @{$temp_h->{$item}}, $_; if ($item ne $last) { push @$results_to_chunk, $item; } $last = $item; } } else # we'll assume that the list item is what needs to be counted { $results_to_chunk = $fullset; } $total_items = scalar @$results_to_chunk; # do the calculation for the number of chunks my $n_chunks = get_n_chunks($total_items, $chunk_size); # now let's do the chunking itself. my ($from, $to) = ($chunk_size * ($chunk_n - 1), $chunk_size * $chunk_n - 1); if ($from > $total_items) # our chunk number is beyond the range of viable chunks. Return the last page of results instead { $chunk_n = $n_chunks; ($from, $to) = ($chunk_size * ($n_chunks - 1), $total_items - 1); } if ($to >= $total_items) { $to = $total_items - 1; } @$subset = @$results_to_chunk[$from..$to]; # transform the results back, if necessary if ($subr) { my @temp; foreach (@$subset) { push @temp, @{$temp_h->{$_}}; } $subset = [ @temp ]; } return { n_chunks => $n_chunks, subset => $subset, chunk_n => $chunk_n }; } =head2 get_n_chunks Arguments - no of results, chunk size (optional) returns - number of chunks required to show the results =cut sub get_n_chunks { my $total_items = shift; my $chunk_size = shift || get_environment_param('page_size'); my $n_chunks = 1; # DEBUG # print STDERR "n items = $total_items; chunk size: $chunk_size\n"; return $n_chunks if $chunk_size eq "all"; # do the calculation for the number of chunks $n_chunks = int($total_items / $chunk_size); $n_chunks++ if ($total_items % $chunk_size); return $n_chunks; } =head2 get_environment_param Get an AmiGO environment variable (from config.pl) =cut sub get_environment_param { my $var = shift; if (defined($ENV{uc("AMIGO_$var")})) { return $ENV{uc("AMIGO_$var")}; } return; } =head2 get_external_environmental_param Get an environment variable from the outside (like CGI) =cut sub get_external_environmental_param { my $var = shift; my $retval = ""; if (defined($ENV{uc("$var")})) { $retval = $ENV{uc("$var")}; } return $retval; } =head2 set_message Set messages / errors Arguments - message hash, message class: fatal, warning or info message type: e.g. 'no_valid_query', 'no_results' what it affects (optional) Returns - new improved message hash sub set_message { my $error = shift; my $class = shift; my $type = shift; my $affects = shift || []; if (!ref($affects)) { $affects = [ $affects ]; } push @{$error->{$class}{$type}}, @$affects; return $error; } # not used (at the moment) sub get_message { my $error = shift; my $class = shift || undef; if ($class && $error->{$class}) { return $error->{$class}; } return $error; } =cut =head2 get_valid_list Takes a listref and checks for duplicates / undef entries, etc. Returns a ref to the valid items in the list =cut sub get_valid_list { my $list = shift; return unless $list; return [$list] if !ref($list); # check for / remove dups and blank entries my %hash; my @newlist; foreach (@$list) { if (defined $_) { push @newlist, $_ unless $hash{$_}; $hash{$_}++; } } return [ @newlist ] || undef; } =head2 add_value_to_list Adds a value to a list, ensuring that there are no duplicates =cut sub add_value_to_list { my $list = shift; my $value = shift; return $list if !$value; return [ $value ] if (!$list || !@$list); my %hash; my @newlist; foreach (@$list) { if (defined $_) { push @newlist, $_ unless $hash{$_}; $hash{$_}++; } } push @newlist, $value unless $hash{$value}; return [ @newlist ] || undef; } =head2 remove_value_from_list Removes a value from a list =cut sub remove_value_from_list { my $list = shift; my $value = shift; return $list if !$value; return if (!$list || !@$list); my %hash; my @newlist; $hash{$value} = 1; foreach (@$list) { if (defined $_) { push @newlist, $_ unless $hash{$_}; $hash{$_}++; } } return [ @newlist ] || undef; } =head2 sort_list Input: self, arg_h consisting of list => [ list to be sorted ] crit => [ list of sort criteria ] Output: sorted list This works by creating a list of functions or transformations to be performed upon the list, the results of which are sorted. =cut sub sort_list { my $args = shift; # DEBUG # print STDERR "arguments: ".Dumper($args); my ($list, $sort_crit) = ($args->{list}, $args->{crit}); # return unless we have a list of greater than one in length return if !$list || !@$list; return $list if (scalar @$list == 1 || !$sort_crit); # make sure it's an array $sort_crit = [$sort_crit] unless (ref($sort_crit) eq 'ARRAY'); # return if it's an empty list return $list if scalar @$sort_crit == 0; # build up our transformation subroutine my $fn_list = [ map { &_return_fn($_) } @$sort_crit ]; # DEBUG # print STDERR "function list: ".Dumper($fn_list)."\n"; my @sorted_list; my %refs; my $acc = 0; @sorted_list = map { $refs{(split("\0", $_))[-1]} } sort map { my $val = $_; $acc++; $refs{ sprintf("%08d", $acc) } = $val; join("\0", (map { $_->($val) } @$fn_list), sprintf("%08d", $acc), ); } @$list; # DEBUG # print STDERR "sorted list: " . Dumper(\@sorted_list)."\n"; return \@sorted_list; } =head2 _return_fn Input: the sort criterion or bit of data in an encoded form Output: a subroutine to get that bit of data =cut sub _return_fn { my $fn = shift; # print STDERR "fn = ".Dumper($fn)); return if !$fn; # return if there's no function # sub { # my $item = shift; # my $func; if (!ref($fn)) { # assume this is the function we need to call # $func = return sub { my $item = shift; # # DEBUG # print STDERR "ref of item: ".Dumper( ref($item) )."\n"; # if ($item->can($fn)) # { return lc($item->$fn); # } return lc($item->$fn) || lc($item->{$fn}) || undef; }; } elsif (ref($fn) eq 'CODE') { return $fn; } =cut this for now elsif (ref($fn) eq 'ARRAY') { my $fn_arr; foreach (@$fn) { my $sub = _return_fn($_); if (!$sub) { # DEBUG print STDERR "Problem with _return_fn arg $_; returning undef\n"; return undef; } push @$fn_arr, $sub; } # convert the current fn_arr into a function in its own right! return sub { my $item = shift; foreach (@$fn_arr) { my $result = &$_($item); return undef if ! $result; # set item to be the result of the function $item = $result; } return $item; }; } =cut elsif (ref($fn) eq 'HASH') { while ( my ($k, $v) = each %$fn ) { if ($k eq 'in_list') { # looking for $item in list v # $func = return sub { my $item = shift; return 1 if grep { lc($item) eq lc($_) } @$v; return 0; }; } elsif ($k eq 'has_fn') { # getting a certain fn value # $func = return sub { my $item = shift; return lc($item->$v) || undef; }; } elsif ($k eq 'has_hash_key') { # getting a hash key # $func = return sub { my $item = shift; return lc($item->{$v}) || undef; }; } elsif ($k eq 'do_fn') { #$v->{fn} is the function we want to perform #$v->{obj} is what we want to perform it on return sub { my $item = shift; # check if $v->{obj} is a value or if it's another nested thing if (ref($v->{obj}) eq 'HASH') { # replace $v->{obj} with the value $v->{obj} = &_return_fn($v->{obj}, $item); } # my $first_item = $v->{obj}->($item); # my $second_item = $v->{fn}->( $v->{obj}->($item) ); # # DEBUG # print STDERR "first item: ".Dumper($first_item); # print STDERR "second item: ".Dumper($second_item); return $v->{fn}->( $v->{obj}->($item) ); }; } } } else { print STDERR "Cannot understand _return_fn argument $fn"; return undef; } # }; } =head2 group_data_by_param group data by a parameter of the data input: self, array of objects, parameter to group by output: hash with the parameter value (or 'NONE') as the key and an array of matching objects as the values =cut sub group_data_by_param { my $self = shift; my ($obj_arr, $group_by) = @_; my ($test, $param, $args) = ( $group_by->{FN}, $group_by->{PARAM}, $group_by->{ARGS} ); print STDERR "test: ".Dumper($test)."param: ".Dumper($param)."args: ".Dumper($args); my $group_h; # where we're going to store the results my $sub; # the subroutine to perform upon each member of the list # use the appropriate subroutine, according to what we have in the spec my $spec = $self->get_spec; # this needs to be changed! my $p_type = $spec->{$param}{type} || 'unknown'; # sort out our subroutine # this is what we're doing to each member of the list if ($test && $args) # we have a test, so group by the results of the test { $p_type = 'scalar'; # this will return either true or false, i.e. a scalar $sub = sub { my $entry = shift; if ( &$test->( $args, $entry->$param ) ) { return 'PASS'; } else { return 'FAIL'; } }; print STDERR "test and value are present\n"; } elsif ($test) # the 'test' is actually a transform of some sort { $sub = sub { my $entry = shift; return &$test->( $entry->$param ); }; print STDERR "test is present\n"; } else # no value, a value but no test, or just the parameter { # group entries by this parameter $sub = sub { my $entry = shift; return $entry->$param; }; print STDERR "test not found: $param\n"; } # we will treat the values produced above in different ways, according to # whether they're scalar or arrays my $grouping_subroutine = { 'scalar' => sub { my $entry = shift; # param is a single value my $val = $sub->($entry) || 'NONE'; push @{$group_h->{ $val }}, $entry; }, 'array' => sub { my $entry = shift; # param is a list of values my $val_list = $sub->($entry) || [ 'NONE' ]; # put the entry in every category under which it fits push @{$group_h->{ $_ }}, $entry foreach @$val_list; }, 'unknown' => sub { my $entry = shift; print STDERR "performing sub unknown!\n"; my $val = $sub->($entry) || [ 'NONE' ]; # convert param into a list $val = [ $val ] if ref($val) ne 'ARRAY'; # put the entry in every category under which it fits push @{$group_h->{ $_ }}, $entry foreach @$val; }, }; print STDERR "p_type = ".$p_type."\n"; # do the sub on each entry of the list foreach my $entry (@$obj_arr) { #$grouping_subroutine->{$p_type}->($_); # print STDERR "performing sub unknown!\n"; my $val = $sub->($entry) || [ 'NONE' ]; # convert param into a list $val = [ $val ] if ref($val) ne 'ARRAY'; # put the entry in every category under which it fits push @{$group_h->{ $_ }}, $entry foreach @$val; } # print STDERR "group_h: ".Dumper( $group_h ); return $group_h; } sub __get_me { my $to_get = shift; # return if there is no transform specified return if !$to_get; print STDERR "to_get: ".Dumper($to_get); sub { if (ref($to_get) eq 'HASH') { my $item = shift; my ($k, $v); while (($k, $v) = each %$to_get) { if ($k eq 'in_list') { return 1 if grep { $item eq $_ } @$v; return 0; } elsif ($k eq 'has_fn') { return $item->$v || undef; } elsif ($k eq 'has_hash_key') { return $item->{$v} || undef; } elsif ($k eq 'do_fn') { # check if $v->{obj} is a value or if it's another nested thing if (ref($v->{obj}) eq 'HASH') { # replace $v->{obj} with the value $v->{obj} = &__get_me($v->{obj}, $item); } return $v->{fn}->( $v->{obj}->($item) ); } last; } } elsif (ref($to_get) eq 'ARRAY') { return join("\0", map { __get_me($_) } @$to_get ); } else { if ($to_get eq 'arr[0]') { my $item = shift; return $item->[0] || ''; } elsif ($to_get eq 'arr[1]') { my $item = shift; return $item->[1] || ''; } elsif ($to_get eq 'arr[2]') { my $item = shift; return $item->[2] || ''; } elsif ($to_get eq 'arr[0,1]') { my $item = shift; return ($item->[0] || '') . "\0" . ($item->[1] || ''); } elsif ($to_get eq 'arr[0,2]') { my $item = shift; return join("\0", $item->[0] || '', $item->[2] || ''); # return __get_me('arr[0]', @_) . "\0" . __get_me('arr[2]', @_); } } return shift || undef; } } sub get_me { return __get_me(@_); } =head2 turn_into_text Turns (semi-) formatted text into slightly more formatted text =cut sub turn_into_text { my $proto_text = shift; # encode any ampersands $proto_text =~ s/&/&/gs; # encode superscripts $proto_text =~ s/\^(-?\d+)/$1<\/sup>/gs; ## add A links to URLs # if there's anything that starts with www and doesn't have an http:// on it, # add the prefix # $proto_text =~ s/(?\)]|\.$)/$1$3<\/a>$4/gis; print STDERR "Turning into text: $proto_text\n"; return $proto_text; } =head2 summarize_errors Creates a summary of the input errors input: $results, # a Data::FormValidator::Results object $joiner # a string with which to join the results, optional output: an array of strings indicating which fields are erroneous if $joiner is specified, returns as a single string, with results separated by $joiner =cut sub summarize_errors { my $results = shift; my $str; # print STDERR "results: ".Dumper($results); # make a summary of the errors # Print the name of missing fields if ( $results->has_missing ) { push ( @$str, "$_ is missing" ) for ( $results->missing ); } # Print the name of invalid fields if ( $results->has_invalid ) { push @$str, "$_ is invalid" foreach ( $results->invalid ); } # Print the name of unknown fields if ( $results->has_unknown ) { push ( @$str, "$_ is unknown" ) for ( $results->unknown ); } my $joiner = shift; if ($joiner) { return join($joiner, @$str); } # return as an array otherwise return $str; } =head2 check_list_for_values input: arg_h->{query_list} # what we're looking for arg_h->{results} # where we're looking for it arg_h->{test} # the value(s) we need from each resultperl output: { missing => [ ids ], found => [ ids ] } =cut sub check_list_for_values { my $args = shift; my $query_list = $args->{query_list}; # results list. If absent, abort straight away if (! $args->{results}) { return { missing => $args->{query_list} }; } # otherwise, see my $test = $args->{test}; # how to specify this?! $test = sub { my ($a, $b) = @_; return 1 if ($a eq $b || $a == $b); return 0; } if ! defined $test; my $result_h; QUERY_LIST: foreach (@$query_list) { # look for this in the results list foreach my $r ( @{$args->{results}} ) { if ( &$test($r, $_) ) { push @{$result_h->{found}}, $_; next QUERY_LIST; } } push @{$result_h->{missing}}, $_; } return $result_h; } 1;