=head1 SYNOPSIS

package GO::Utilities;

Generally useful bits and pieces for AmiGO.

=cut

package GO::Utilities;

use strict;
use lib 'go/scratch/tools';
use Exporter;
use Template;
use GO::TestSet;
use GO::Boolean;
use HTML::Entities;
#use Template 2.19;
#use Template::Constants qw( :all );
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);

@ISA = ('Exporter');
#@EXPORT = qw(get_environment_param get_valid_list add_value_to_list remove_value_from_list);
@EXPORT_OK = qw(get_results_chunk get_n_chunks get_valid_list add_value_to_list remove_value_from_list get_environment_param get_external_environmental_param sort_list group_data_by_param turn_into_text get_matching_objects); # process_page_template

%EXPORT_TAGS = (
	all => [ qw( get_results_chunk get_n_chunks get_valid_list add_value_to_list remove_value_from_list get_environment_param get_external_environmental_param sort_list group_data_by_param turn_into_text get_matching_objects ) ],# process_page_template
	std => [ 
	qw(get_results_chunk get_n_chunks
	sort_list
	get_valid_list
	get_environment_param
	get_external_environmental_param ) ],#	process_page_template

);

use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;

=head2 get_results_chunk

  Arguments - list, 
              chunk_n          - chunk number; defaults to 1
              chunk_size       - how many items in a chunk;
                                 defaults to $ENV{AMIGO_PAGE_SIZE}
              chunk_by         - what constitutes an 'item'; 
                                 defaults to a list item
  returns   - hash->{n_chunks} - total number of chunks,
              hash->{subset}   - chunk X of the input

=cut

sub get_results_chunk {

	my $fullset = shift;
	my $chunk_args = shift;

	return unless $fullset && @$fullset;

	my $chunk_by = $chunk_args->{chunk_by} || undef;
	my $chunk_n = $chunk_args->{chunk_n} || 1;
	my $chunk_size = $chunk_args->{chunk_size} || get_environment_param('page_size');

	# DEBUG
#	print STDERR "list size: ".(scalar @$fullset)."; n = $chunk_n, size = $chunk_size, by = " . ($chunk_by || 'undef') ."\n";

	#	we want everything. No changes required.
	return { n_chunks => 1, subset => $fullset } if $chunk_size eq 'all';

	#	first let's see how many chunks we have
	#	we may need to transform the results slightly to be able to work this out
	my $total_items;      # the number of items in the list
	my $results_to_chunk; # another list to store results in for easy chunking
	my $subset;           # we'll store the results here
	my $temp_h;           # a nice little hash for storage purposes when transforming the list

	my $subr;
	if ($chunk_by) # we know how to transform the results
	{	#$subr = $transforms->{$chunk_by} if $transforms->{$chunk_by};
		$subr = __get_me($chunk_by);
	}
	
	if ($subr)
	{	my $last = "";
		foreach (@$fullset)
		{	my $item = $subr->($_);
			push @{$temp_h->{$item}}, $_;
			if ($item ne $last)
			{	push @$results_to_chunk, $item;
			}
			$last = $item;
		}
	}
	else # we'll assume that the list item is what needs to be counted
	{	$results_to_chunk = $fullset;
	}
	$total_items = scalar @$results_to_chunk;

	#	do the calculation for the number of chunks
	my $n_chunks = get_n_chunks($total_items, $chunk_size);

	#	now let's do the chunking itself.
	my ($from, $to) = ($chunk_size * ($chunk_n - 1), $chunk_size * $chunk_n - 1);

	if ($from > $total_items) # our chunk number is beyond the range of viable chunks. Return the last page of results instead
	{	$chunk_n = $n_chunks;
		($from, $to) = ($chunk_size * ($n_chunks - 1), $total_items - 1);
	}

	if ($to >= $total_items) {
		$to = $total_items - 1;
	}

	@$subset = @$results_to_chunk[$from..$to];

	#	transform the results back, if necessary
	if ($subr)
	{	my @temp;
		foreach (@$subset)
		{	push @temp, @{$temp_h->{$_}};
		}
		$subset = [ @temp ];
	}

	return { n_chunks => $n_chunks, subset => $subset, chunk_n => $chunk_n };

}


=head2 get_n_chunks

  Arguments - no of results, chunk size (optional)
  returns   - number of chunks required to show the results

=cut

sub get_n_chunks {
	my $total_items = shift;
	my $chunk_size = shift || get_environment_param('page_size');
	my $n_chunks = 1;

	# DEBUG
#	print STDERR "n items = $total_items; chunk size: $chunk_size\n";

	return $n_chunks if $chunk_size eq "all";

#	do the calculation for the number of chunks
	$n_chunks = int($total_items / $chunk_size);
	$n_chunks++ if ($total_items % $chunk_size);

	return $n_chunks;
}

=head2 get_environment_param

	Get an AmiGO environment variable (from config.pl)

=cut

sub get_environment_param {
	my $var = shift;
	if (defined($ENV{uc("AMIGO_$var")})) {
		return $ENV{uc("AMIGO_$var")};
	}
	return;
}


=head2 get_external_environmental_param

	Get an environment variable from the outside (like CGI)

=cut

sub get_external_environmental_param {

  my $var = shift;

  my $retval = "";
  if (defined($ENV{uc("$var")})) {
    $retval = $ENV{uc("$var")};
  }
  return $retval;
}


=head2 set_message

	Set messages / errors

	Arguments - message hash,
	            message class: fatal, warning or info
	            message type: e.g. 'no_valid_query', 'no_results'
	            what it affects (optional)

	Returns   - new improved message hash

sub set_message {
	my $error = shift;
	my $class = shift;
	my $type = shift;
	my $affects = shift || [];

	if (!ref($affects))
	{	$affects = [ $affects ];
	}

	push @{$error->{$class}{$type}}, @$affects;

	return $error;
}

#	not used (at the moment)
sub get_message {
	my $error = shift;
	my $class = shift || undef;

	if ($class && $error->{$class})
	{	return $error->{$class};
	}
	return $error;
}

=cut


=head2 get_valid_list

Takes a listref and checks for duplicates / undef entries, etc.
Returns a ref to the valid items in the list

=cut

sub get_valid_list {
	my $list = shift;

	return unless $list;

	return [$list] if !ref($list);

	#	check for / remove dups and blank entries
	my %hash;
	my @newlist;

	foreach (@$list)
	{	if (defined $_)
		{	push @newlist, $_ unless $hash{$_};
			$hash{$_}++;
		}
	}
	return [ @newlist ] || undef;
}

=head2 add_value_to_list

Adds a value to a list, ensuring that there are no duplicates

=cut

sub add_value_to_list {
	my $list = shift;
	my $value = shift;

	return $list if !$value;
	return [ $value ] if (!$list || !@$list);
	
	my %hash;
	my @newlist;

	foreach (@$list)
	{	if (defined $_)
		{	push @newlist, $_ unless $hash{$_};
			$hash{$_}++;
		}
	}
	push @newlist, $value unless $hash{$value};

	return [ @newlist ] || undef;
}

=head2 remove_value_from_list

Removes a value from a list

=cut

sub remove_value_from_list {
	my $list = shift;
	my $value = shift;

	return $list if !$value;
	return if (!$list || !@$list);

	my %hash;
	my @newlist;
	$hash{$value} = 1;
	foreach (@$list)
	{	if (defined $_)
		{	push @newlist, $_ unless $hash{$_};
			$hash{$_}++;
		}
	}

	return [ @newlist ] || undef;
}


=head2 sort_list

Input:  self, arg_h consisting of
        list => [ list to be sorted ]
        crit => [ list of sort criteria ]

Output: sorted list

This works by creating a list of functions or transformations to be performed
upon the list, the results of which are sorted.

=cut

sub sort_list {
	my $args = shift;

	# DEBUG
#	print STDERR "arguments: ".Dumper($args);

	my ($list, $sort_crit) = ($args->{list}, $args->{crit});

	#	return unless we have a list of greater than one in length
	return if !$list || !@$list;
	return $list if (scalar @$list == 1 || !$sort_crit);

	#	make sure it's an array
	$sort_crit = [$sort_crit] unless (ref($sort_crit) eq 'ARRAY');

	#	return if it's an empty list
	return $list if scalar @$sort_crit == 0;

	#	build up our transformation subroutine
	my $fn_list = [ map { &_return_fn($_) } @$sort_crit ];

	# DEBUG
#	print STDERR "function list: ".Dumper($fn_list)."\n";

	my @sorted_list;
	my %refs;
	my $acc = 0;
	
	@sorted_list =
		map { $refs{(split("\0", $_))[-1]} }
		sort
		map {
			my $val = $_;
			$acc++;
			$refs{ sprintf("%08d", $acc) } = $val;
			join("\0",
				(map { $_->($val) } @$fn_list),
				sprintf("%08d", $acc),
			);
		} @$list;

	# DEBUG
#	print STDERR "sorted list: " . Dumper(\@sorted_list)."\n";

	return \@sorted_list;
}


=head2 _return_fn

Input:  the sort criterion or bit of data in an encoded form

Output: a subroutine to get that bit of data

=cut

sub _return_fn {
	my $fn = shift;
	
#	print STDERR "fn = ".Dumper($fn));
	
	return if !$fn; # return if there's no function
	
#	sub {

#	my $item = shift;

#	my $func;
	if (!ref($fn))
	{	# assume this is the function we need to call
#		$func =
		return
		sub {
			my $item = shift;
#			# DEBUG
#			print STDERR "ref of item: ".Dumper( ref($item) )."\n";
			
#			if ($item->can($fn))
#			{	return lc($item->$fn);
#			}
			
			return lc($item->$fn) || 
			lc($item->{$fn}) || undef;
		};
	}
	elsif (ref($fn) eq 'CODE')
	{	return $fn;
	}
=cut this for now
	elsif (ref($fn) eq 'ARRAY')
	{	my $fn_arr;
		foreach (@$fn)
		{	my $sub = _return_fn($_);
			if (!$sub)
			{	# DEBUG
				print STDERR "Problem with _return_fn arg $_; returning undef\n";
				return undef;
			}
			push @$fn_arr, $sub;
		}
		# convert the current fn_arr into a function in its own right!
		return
		sub {
			my $item = shift;
			foreach (@$fn_arr)
			{	my $result = &$_($item);
				return undef if ! $result;
				# set item to be the result of the function
				$item = $result;
			}
			return $item;
		};
	}
=cut
	elsif (ref($fn) eq 'HASH')
	{	while ( my ($k, $v) = each %$fn )
		{	if ($k eq 'in_list')
			{	#	looking for $item in list v
#				$func = 
				return
				sub {
					my $item = shift;
					return 1 if grep { lc($item) eq lc($_) } @$v;
					return 0;
				};
			}
			elsif ($k eq 'has_fn')
			{	#	getting a certain fn value
#				$func =
				return
				sub {
					my $item = shift;
					return lc($item->$v) || undef;
				};
			}
			elsif ($k eq 'has_hash_key')
			{	#	getting a hash key
#				$func =
				return
				sub {
					my $item = shift;
					return lc($item->{$v}) || undef;
				};
			}
			elsif ($k eq 'do_fn')
			{	#$v->{fn} is the function we want to perform
				#$v->{obj} is what we want to perform it on
				return
				sub {
					my $item = shift;
					# check if $v->{obj} is a value or if it's another nested thing
					if (ref($v->{obj}) eq 'HASH')
					{	# replace $v->{obj} with the value
						$v->{obj} = &_return_fn($v->{obj}, $item);
					}
#					my $first_item = $v->{obj}->($item);
#					my $second_item = $v->{fn}->( $v->{obj}->($item) );
#					# DEBUG
#					print STDERR "first item: ".Dumper($first_item);
#					print STDERR "second item: ".Dumper($second_item);
					return $v->{fn}->( $v->{obj}->($item) );
				};
			}
		}
	}
	else
	{	print STDERR "Cannot understand _return_fn argument $fn";
		return undef;
	}

#	};
}

=head2 group_data_by_param

group data by a parameter of the data

input:  self, array of objects, parameter to group by
output: hash with the parameter value (or 'NONE') as the key and an array
        of matching objects as the values

=cut

sub group_data_by_param {
	my $self = shift;
	
	my ($obj_arr, $group_by) = @_;

	my ($test, $param, $args) = ( $group_by->{FN}, $group_by->{PARAM}, $group_by->{ARGS} );

	print STDERR "test: ".Dumper($test)."param: ".Dumper($param)."args: ".Dumper($args);

	my $group_h;  # where we're going to store the results
	my $sub;      # the subroutine to perform upon each member of the list

	# use the appropriate subroutine, according to what we have in the spec
	my $spec = $self->get_spec;   # this needs to be changed!
	my $p_type = $spec->{$param}{type} || 'unknown';

	#	sort out our subroutine
	#	this is what we're doing to each member of the list
	if ($test && $args) # we have a test, so group by the results of the test
	{	$p_type = 'scalar'; # this will return either true or false, i.e. a scalar
		$sub = sub {
			my $entry = shift;
			if ( &$test->( $args, $entry->$param ) )
			{	return 'PASS';
			}
			else
			{	return 'FAIL';
			}
		};

		print STDERR "test and value are present\n";

	}
	elsif ($test) # the 'test' is actually a transform of some sort
	{	
		$sub = sub {
			my $entry = shift;
			return &$test->( $entry->$param );
		};

		print STDERR "test is present\n";

	}
	else  # no value, a value but no test, or just the parameter
	{	# group entries by this parameter

		$sub = sub {
			my $entry = shift; return $entry->$param;
		};

		print STDERR "test not found: $param\n";

	}

	# we will treat the values produced above in different ways, according to
	# whether they're scalar or arrays
	my $grouping_subroutine = {
		'scalar' => sub {
			my $entry = shift;
			# param is a single value
			my $val = $sub->($entry) || 'NONE';
			push @{$group_h->{ $val }}, $entry;
		},
		'array' => sub {
			my $entry = shift;
			# param is a list of values
			my $val_list = $sub->($entry) || [ 'NONE' ];
			# put the entry in every category under which it fits
			push @{$group_h->{ $_ }}, $entry foreach @$val_list;
		},
		'unknown' => sub {
			my $entry = shift;
			print STDERR "performing sub unknown!\n";
			my $val = $sub->($entry) || [ 'NONE' ];
			# convert param into a list
			$val = [ $val ] if ref($val) ne 'ARRAY';
			# put the entry in every category under which it fits
			push @{$group_h->{ $_ }}, $entry foreach @$val;
		},
	};
	
	print STDERR "p_type = ".$p_type."\n";
	
	# do the sub on each entry of the list
	foreach my $entry (@$obj_arr)
	{	#$grouping_subroutine->{$p_type}->($_);
#		print STDERR "performing sub unknown!\n";
		my $val = $sub->($entry) || [ 'NONE' ];
		# convert param into a list
		$val = [ $val ] if ref($val) ne 'ARRAY';
		# put the entry in every category under which it fits
		push @{$group_h->{ $_ }}, $entry foreach @$val;
	}
	
#	print STDERR "group_h: ".Dumper( $group_h );
	
	return $group_h;
}


sub __get_me {
	my $to_get = shift;

	#	return if there is no transform specified
	return if !$to_get;
	
	print STDERR "to_get: ".Dumper($to_get);
	
	sub {
	
		if (ref($to_get) eq 'HASH')
		{	my $item = shift;
			my ($k, $v);
			while (($k, $v) = each %$to_get)
			{	if ($k eq 'in_list')
				{	return 1 if grep { $item eq $_ } @$v;
					return 0;
				}
				elsif ($k eq 'has_fn')
				{	return $item->$v || undef;
				}
				elsif ($k eq 'has_hash_key')
				{	return $item->{$v} || undef;
				}
				elsif ($k eq 'do_fn')
				{	# check if $v->{obj} is a value or if it's another nested thing
					if (ref($v->{obj}) eq 'HASH')
					{	# replace $v->{obj} with the value
						$v->{obj} = &__get_me($v->{obj}, $item);
					}
					return $v->{fn}->( $v->{obj}->($item) );
				}
				last;
			}
		}
		elsif (ref($to_get) eq 'ARRAY')
		{	return
				join("\0",
					map { __get_me($_) } @$to_get );
		}
		else
		{	if ($to_get eq 'arr[0]')
			{	my $item = shift;
				return $item->[0] || '';
			}
			elsif ($to_get eq 'arr[1]')
			{	my $item = shift;
				return $item->[1] || '';
			}
			elsif ($to_get eq 'arr[2]')
			{	my $item = shift;
				return $item->[2] || '';
			}
			elsif ($to_get eq 'arr[0,1]')
			{	my $item = shift;
				return ($item->[0] || '') . "\0" . ($item->[1] || '');
			}
			elsif ($to_get eq 'arr[0,2]')
			{	my $item = shift;
				return join("\0",
					$item->[0] || '',
					$item->[2] || '');
	#			return __get_me('arr[0]', @_) . "\0" . __get_me('arr[2]', @_);
			}
		}
		return shift || undef;

	}
}

sub get_me {
	return __get_me(@_);
}



=head2 turn_into_text

	Turns (semi-) formatted text into slightly more formatted text

=cut

sub turn_into_text {
	my $proto_text = shift;
	
	#	encode any ampersands
	$proto_text =~ s/&/&amp;/gs;
	
	#	encode superscripts
	$proto_text =~ s/\^(-?\d+)/<sup>$1<\/sup>/gs;
	
	## add A links to URLs
	# if there's anything that starts with www and doesn't have an http:// on it,
	# add the prefix
	
#	$proto_text =~ s/(?<!http:\/\/)www/http:\/\/www/gis;
	$proto_text =~ s/((f|ht)tps?:\/\/)(\S+?)([\s\>\)]|\.$)/<a href="$1$3">$1$3<\/a>$4/gis;

	print STDERR "Turning into text: $proto_text\n";


	return $proto_text;
}


=head2 summarize_errors

Creates a summary of the input errors

input:  $results, # a Data::FormValidator::Results object
        $joiner   # a string with which to join the results, optional
output: an array of strings indicating which fields are erroneous
        if $joiner is specified, returns as a single string, with results separated by $joiner
=cut

sub summarize_errors {
	my $results = shift;
	my $str;

#	print STDERR "results: ".Dumper($results);

	# make a summary of the errors
	# Print the name of missing fields
	if ( $results->has_missing )
	{	push ( @$str, "$_ is missing" ) for ( $results->missing );
	}

	# Print the name of invalid fields
	if ( $results->has_invalid )
	{	push @$str, "$_ is invalid" foreach ( $results->invalid );
	}
		
	# Print the name of unknown fields
	if ( $results->has_unknown )
	{	push ( @$str, "$_ is unknown" ) for ( $results->unknown );
	}

	my $joiner = shift;
	if ($joiner)
	{	return join($joiner, @$str);
	}
	
	# return as an array otherwise
	return $str;

}

=head2 check_list_for_values

input:  arg_h->{query_list}  # what we're looking for
        arg_h->{results}     # where we're looking for it
        arg_h->{test}        # the value(s) we need from each resultperl

output: { missing => [ ids ], found => [ ids ] }

=cut

sub check_list_for_values {
	my $args = shift;
	my $query_list = $args->{query_list};

	# results list. If absent, abort straight away
	if (! $args->{results})
	{	return { missing => $args->{query_list} };
	}
	
	# otherwise, see 
	my $test = $args->{test}; # how to specify this?!
	$test = sub { my ($a, $b) = @_; return 1 if ($a eq $b || $a == $b); return 0; } if ! defined $test;
	
	my $result_h;
	QUERY_LIST:
	foreach (@$query_list)
	{	# look for this in the results list
		foreach my $r ( @{$args->{results}} )
		{	if ( &$test($r, $_) )
			{	push @{$result_h->{found}}, $_;
				next QUERY_LIST;
			}
		}
		push @{$result_h->{missing}}, $_;
	}
	return $result_h;
}


1;