package GO::GeneralPurposeParser;

use strict;
use lib '/Users/gwg/go/scratch/tools';
#use Carp;
#use Exporter;
use Data::Dumper;
$Data::Dumper::Indent = 1;
#$Data::Dumper::Deparse = 1;
#use GO::Object::Generic;

use base 'GO::MsgLite';

use vars qw($AUTOLOAD);
use GO::Boolean;
use GO::Utilities;

=head1 Public methods: new, parse_from_file, parse_array, reset

These methods are the only methods that should be used to interact with
and get data from the parser

=cut

sub new 
{	my $proto = shift;
	my $class = ref($proto) || $proto;
	my $self = {};
	bless $self, $class;

	return $self->set_parser_params(@_);
}

sub set_parser_params {
	my $self = shift;
	my $params = shift || {};

=comment out for now
## fail modes:
#	FAIL => die on failure
#	WARN_AND_IGNORE => # Issue a warning and don't store the data
#	WARN_AND_RECORD => # Issue a warning but keep the data 
#	IGNORE => # as above, no warning given
#	RECORD => # as above, no warning given

	my $error_mode = $params->{error_mode};
	# check the error mode is legal
	if ($error_mode)
	{	unless (grep { $error_mode eq $_ } qw(FAIL WARN_AND_IGNORE WARN_AND_RECORD IGNORE RECORD))
		{	undef $error_mode;
		}
	}
	# set the error mode; default to 'warn and record' if no error mode provided
	$self->{__ERROR_MODE} = $error_mode || $self->default_value('error_mode');
=cut

## verbosity
	if ($params->{verbose_mode})
	{	$self->set_verbosity($params->{verbose_mode});
	}

	
## returning results:
#	default: return an array / hash / whatever with the parsed values in it
#	success_hash: return the data in a slightly different form
#	parsed => 
#	unparsed => 

	my $return_mode = $params->{return_mode};
	if ($return_mode)
	{	unless (grep { $return_mode eq $_ } qw(SUCCESS_HASH))
		{	undef $return_mode;
		}
	}
	# set the return mode
	$self->{__RETURN_MODE} = $return_mode if $return_mode;
	return $self;
}


=head2 reset_parser

Resets the parser and removes any existing data

=cut

sub reset_parser {
	my $self = shift;

	foreach my $k (keys %$self)
	{	delete $self->{$k} unless grep { $k eq $_ } qw(__RETURN_MODE __ERROR_MODE __VERBOSITY);
	}

	return $self;
}




=head2 parse_from_file

Wrapper for the parse_from_file_subr subroutine

=cut

sub parse_from_file {
	my $self = shift;

	my $results;
	eval {
		$results = $self->parse_from_file_subr(@_);
	};
	if ($@)
	{	$self->fatal_msg("Something terrible happened! $@");
	}

	return $self->return_results($results);
}


=head2 parse_text

Wrapper for the parse_text_subr subroutine

=cut

sub parse_text {
	my $self = shift;

	my $results;
	eval {
		$results = $self->parse_text_subr(@_);
	};
	if ($@)
	{	$self->fatal_msg("Something terrible happened! $@");
	}

	return $self->return_results($results);
}



=head2 parse_array

Wrapper for the parse_array_subr subroutine

=cut

sub parse_array {
	my $self = shift;

	my $results;
	eval {
		$results = $self->parse_array_subr(@_);
	};
	if ($@)
	{	$self->fatal_msg("Something terrible happened! $@");
	}
	
	return $self->return_results($results);
}


=head2 return_results

Returns the results either as they are, or in the form of a success hash.
Success hash looks like this:

{	results => $results,              # the results (if any)
	success => 1,                     # 0 if no results are found
	msg_list => $self->get_all_msgs,  # any messages
}

=cut

sub return_results {
	my $self = shift;
	my $results = shift;
	
	if (! $self->{__RETURN_MODE} ) # no return mode specified; just return results
	{	return $results;
	}
	
	if ($self->{__RETURN_MODE} eq 'SUCCESS_HASH')
	{	my $return_h;
		
		if (defined $results)
		{	$return_h = { success => 1, results => $results };
		}
		else
		{	$return_h = { success => 0 };
		}
		
		if ($self->has_msgs)
		{	$return_h->{msg_list} = $self->get_all_msgs;
		}
		return $return_h;
	}
}


=head2 default_value

Get default values for parameters

=cut

sub default_value {
	my $self = shift;
	my $param = shift;
	
	my $default = {
		file_separator => "\n",
		delimiter => qr/\t/,
#		regexp =>
		tag_val_sep => qr/:/,
		error_mode => 'WARN_AND_RECORD',
	};
	
	return $default->{$param} ||
		$self->printerr("No default value for the parameter $param") && return undef;
}


sub valid_parsers {
	return [
		'regexp',
		'tag_value',
		'delimited',
		'multi_field_tag_value',
		'subroutine',
	];
}


=head2 is_valid_parser

Returns 1 if the parser is valid; 0 otherwise

=cut

sub is_valid_parser {
	my $self = shift;
	my $p_name = shift;
	return 1 if grep { $p_name eq $_ } @{$self->valid_parsers};
	return 0;
}


=head2 parse_from_file_subr

input:  self, arg_h containing
        - file           # the name of the file
        - file_separator # separate file into records; optional; default is "\n"
        
        - preserve_separator  # if present, will keep the file_separator instead
                         # of chopping it off
        
        - preserve_whitespace # if present, will preserve records consisting
                         # solely of whitespace, with no character data

        - inner_parser   # a hash containing specifications for further processing
                         # of the data

output: either an error message to stderr or the parsed file data

Unless there is further parsing data in the inner_parser argument, data will be
returned as an array of strings

=cut

sub parse_from_file_subr {
	my $self = shift;
	my $arg_h = shift;
	$self->startme();
	

	if (! $arg_h->{file})
	{	$self->fatal_msg("No file name specified for the read_file parser!");
		return undef;
	}

	## check that the file exists and is readable
	my $file = $arg_h->{file};
	if (!$file)
	{	$self->fatal_msg("The file $file could not be found");
		return undef;
	}
	elsif (! -r $file)
	{	$self->fatal_msg("The file handle $file is invalid");
		return undef;
	}

	## find out how to separate the data
	if ($arg_h->{file_separator})
	{	# make sure the separator is the correct format
		if (ref $arg_h->{file_separator})
		{	$self->fatal_msg("file_separator should be a string");
			return undef;
		}
	}
	if (!$arg_h->{file_separator})
	{	# use the default
		$arg_h->{file_separator} = $self->default_value('file_separator');

#		$self->debugme("file separator is ".Dumper($arg_h->{file_separator}));

	}

	my @to_do_list;
	unless ($arg_h->{preserve_separator})
	{	push @to_do_list, sub { my $x = shift; $x =~ s/$arg_h->{file_separator}$//; return $x };
	}
	unless ($arg_h->{preserve_whitespace})
	{	push @to_do_list, sub { my $x = shift; return undef unless $x =~ /\S/; return $x; };
	}

	my $output;
	my $parser;
	if (! $arg_h->{inner_parser})
	{	# we need a save subroutine. Push the data on to a list, since nothing is specified
		push @to_do_list, sub { my $x = shift; push @$output, $x; };
	}

	else
	{	# we're actually going to do something with the results.
		
		## A little bit of checking before we start to ensure that we have
		## all that is required
		$parser = $self->prep_parser($arg_h->{inner_parser});
	
		# quit if there's no parser
		return undef unless $parser;
		
#		$Data::Dumper::Deparse = 1;
#		print STDERR "parser: ".Dumper($parser);
#		$Data::Dumper::Deparse = 0;


#		$self->debugme("parser looks like this: ".Dumper($parser)."\n");

#		push (@to_do_list, sub { return $self->run_loop_single_item(shift, $parser) });
	}

#	$self->debugme("To do list: ".Dumper([ @to_do_list ]));
	
	my $save = $self->_prepare_save_sub($arg_h);
	$self->debugme("save sub: ".Dumper($save));
	return undef if ! $save;


	# read in the file
	# open the file and read in the lines
	open(FH, $file) || $self->fatal_msg("Could not open file $file: $!") && return undef;
	
	# set the file separator
	local $/ = $arg_h->{file_separator};
	
#	$self->debugme("file separator: ".Dumper($/));

	if (! $parser)
	{	while (<FH>)
		{	my $line = $_;
			foreach (@to_do_list)
			{	my $result = &$_($line);
				next unless $result;
				$line = $result;
			}
		}
	}
	else
	{	
		while (<FH>)
		{	my $line = $_;
#			$self->debugme("Getting a record...");
			foreach (@to_do_list)
			{	my $result = &$_($line);
				next unless $result;
				$line = $result;
			}
#			$self->debugme("Ran the to-do list tests on line");
			my $result = $self->run_loop_single_item($line, $parser);
#			$self->debugme("Ran the loop on line");
			next unless $result;

			# save the output
			$self->debugme("running the save sub, input is\n".Dumper($result)."\n\n");
			$output = &$save($result, $output);
		}
	}
	close(FH);

	if (! $output ||
		(ref $output eq 'ARRAY' && ! @$output) ||
		(ref $output eq 'HASH' && ! %$output) )
	{	# add an error if we don't have any data
		$self->warning_msg({ MSG => "Parsing $file produced no results", MSG_CODE => 'no_results' });
		return undef;
	}

	return $output;
#
}


=head2 parse_text_subr

A little shortcut to put the input into parse_array_subr

input:  $self,
        data to be parsed # as a string
        the parser arguments
        
output: parsed data!

=cut

sub parse_text_subr {
	my $self = shift;
	my $string = shift;
	
	my $results = $self->parse_array_subr( [ $string ], @_ );

	# return the first item if the results are as an array
	if ($results && ref $results eq 'ARRAY' && scalar @$results == 1)
	{	return $results->[0];
	}
	
	return $results;
}


=head2 parse_array_subr

This is the main function for getting parsing done.

input:  $self, 
        data to be parsed # as an arrayref
        the parser arguments, prepared using prep_parser

output: the data, all nicely parsed!

=cut

sub parse_array_subr {
	my $self = shift;
	my $data = shift;
	
	my $data_copy;

	# check that the data is an array ref; if not, convert it into one
	if (! $data )
	{	$self->fatal_msg({ MSG => "No input data could be found", MSG_CODE => 'no_data' });
		return undef;
	}
	elsif ( ! ref $data )
	{	# convert it into an array
		$data_copy = [ $data ];
	
	}
	elsif (ref $data ne 'ARRAY')
	{	$self->fatal_msg({ MSG => "Error! Parser requires an array as input", MSG_CODE => 'data_wrong_format' });
		return undef;
	}
	else
	{	# phew! Everything is OK!
		# make a copy of the data so that we don't mess up the original.
		foreach (@$data)
		{	if (ref $_)
			{	### oh no! We've somehow managed to get a data structure in here...
				$self->fatal_msg({ MSG => "Error! Parser requires an array of strings as input", MSG_CODE => 'data_wrong_format' });
				return undef;
			}
			push @$data_copy, $_ if defined $_;
		}
	}

	if (!$data_copy || !@$data_copy)
	{	$self->fatal_msg({ MSG => "No input data could be found", MSG_CODE => 'no_data' });
		return undef;
	}
	
	my $arg_h = shift;
	$self->startme();
	
	my $parser = $self->prep_parser($arg_h);

	# quit if there's no parser
	return undef unless $parser;
	
	my $results = $self->run_loop($data_copy, $parser);

	if (! $results )
	{	$self->warning_msg({ MSG => "Parsing produced no results", MSG_CODE => 'no_results' });
		return undef;
	}

	return $results;

}








=head2 run_loop

input:  array to be parsed
        parser arguments
        
output: parsed things!
=cut

sub run_loop {
	my $self = shift;
	my $input_arr = shift || return;
	my $parser = shift;

#$Data::Dumper::Deparse = 1;
#$self->debugme("Arguments IN:\n"
#	."self: ".Dumper($self)
#	."\ninput: ".Dumper($input)
#	."\nparser: ".Dumper($parser)
#);
#$Data::Dumper::Deparse = 0;
	
	return $input_arr if ! $parser->{execution_stack};
	my $save = $parser->{save};
	my $output;

#	$self->debugme("input has type ". (ref $input || 'SCALAR') );

#	if (ref $input && ref $input eq 'HASH')
#	{	my @temp;
#		while (my ($k, $v) = each %$input) {
#			push @temp, { $k => $v };
#		}
#		$input = [ @temp ];
#	}

	INPUT_LOOP:
	foreach my $input (@$input_arr)
	{	# skip unless there's some kind of input
		unless ($input)
		{#	$self->debugme("skipping blank input") && next;
		}

		my $result = $self->run_loop_single_item($input, $parser);
		next unless $result;

#		my $result;
#		foreach my $sub ( @{$parser->{execution_stack}} )
#	#	{	my $result = &$sub($input);
#		{	$result = &$sub($input);
#			if (! $result )
#			{	# new : put the unparsed data here for safekeeping
#				push @{$self->{UNPARSED_DATA}}, $input;
#				return undef;
#				next INPUT_LOOP;
#			}
#			$input = $result;
#			$self->debugme("$sub done: results look like this:\n".Dumper($result)."\n");
#		}
	
#		# save the output
#		$self->debugme("running the save sub...");
		$output = &{$parser->{save}}($result, $output);
#		$self->debugme("output post save: ".Dumper($output));
	}

#	$self->debugme("returning output: ".Dumper($output));

	return $output;
#	return $parser->{__OUTPUT};
}


=head2 run_loop_single_item

input:  item to be parsed
        parser arguments
        
output: parsed things!
=cut

sub run_loop_single_item {
	my $self = shift;
	my $input = shift || return;
	my $parser = shift;
#	my $output = shift;

	return $input if ! $parser->{execution_stack};

	foreach my $sub ( @{$parser->{execution_stack}} )
	{	my $result = &$sub($input);
		if (! $result )
		{	# new : put the unparsed data here for safekeeping
			push @{$self->{UNPARSED_DATA}}, $input;
			return undef;
		}
		$input = $result;
#		$self->debugme("$sub done: results look like this:\n".Dumper($result)."\n");
	}
	return $input;

}



##### The parsers themselves. Woooo! #####


=head2 tag_value_parser

input:  string
        parser arguments:
        - tag_val_sep  # will use the default otherwise

output: { <stuff before tag_val_sep> => <stuff after tag_val_sep> }
        
        if there is no value, the parser returns { <tag> => undef }
        if there is no tag, the parser returns { '' => <value> }
        if there is nothing, the parser returns undef

Note that each parsed string gets turned into a hashref, which could potentially
be rather on the un-useful side. It may be beneficial to use the 'save_as' argument
to output a different structure.

=cut

sub tag_value_parser {
	my $self = shift;
	my $data = shift || return;
	return unless $data =~ /\S/;
	my $arg_h = shift;

#	$self->debugme("TVP: arg_h: ".Dumper($arg_h));
#	$self->debugme("input: ".Dumper($data));

	# make sure we have a tag_val_sep
	$self->fatal_msg("No tag_val_sep!") && return unless $arg_h->{tag_val_sep};

	my $tag_val_sep = $arg_h->{tag_val_sep};
	
	my ($t, $v) = split(/\s*$tag_val_sep\s*/, $data, 2);

#	$self->debugme("t: ".Dumper($t)."v: ".Dumper($v));

	# return if there is nothing in the tag or the value
	return undef unless $t =~ /\S/ || $v =~ /\S/;
	
	return { $t => $v };
}


=head2 regexp_parser

input:  string
        parser arguments:
        - regexp  # will use the default otherwise
        - get_field_names # map the data to the names of the fields

output: [ results of parsing with regexp ]
        if get_field_names is on, the output will be a hash
        otherwise, the output is an array
        
        if the regexp doesn't produce any results, the parser returns undef

=cut

sub regexp_parser {
	my $self = shift;
	my $data = shift || return;
	return unless $data =~ /\S/;
	my $arg_h = shift;

	# make sure we have a regexp
	$self->fatal_msg("No regexp!") && return unless $arg_h->{regexp};

	my $regexp = $arg_h->{regexp};
	my $values;
	
	# use a regexp to get the data
	@$values = ($data =~ /$regexp/);

#	$self->debugme("values: ".Dumper($values));
	
	if (! $values || ! @$values)
	{	return undef;
	}

	return $self->map_values_to_fields($values, $arg_h) if $arg_h->{get_field_names};

	return $values;
}


=head2 delimited_parser

input:  string
        parser arguments:
        - delimiter       # will use the default otherwise
        - field_list      # the fields in the string (if known)
        - n_fields        # the number of fields to split the data into (if known)
        - get_field_names # map the data to the names of the fields

output: results of parsing or undef
        if get_field_names is on, the output will be a hash
        otherwise, the output is an array

=cut

sub delimited_parser {
	my $self = shift;
	my $data = shift || return;
	return unless $data =~ /\S/;
	my $arg_h = shift;

	$self->fatal_msg("No delimiter!") && return unless $arg_h->{delimiter};

#	$self->debugme("delimited_parser: arg_h: ".Dumper($arg_h));
#	$self->debugme("input: ".Dumper($data));


	# xxx-delimited data.
	# split the data with the delimiter
	my $delimiter = $arg_h->{delimiter};

	my $values;
	# if we known how many fields to split the data into, do so
	if ($arg_h->{n_fields})
	{	@$values = split(/\s*$delimiter\s*/, $data, $arg_h->{n_fields});
	}
	# or if we have a field list, split it into that many fields
	elsif ($arg_h->{field_list})
	{	@$values = split(/\s*$delimiter\s*/, $data, scalar @{$arg_h->{field_list}});
	}
	# otherwise, just split it into as many pieces as there are
	else
	{	@$values = split(/\s*$delimiter\s*/, $data);
	}

	if (! $values || ! @$values)
	{	return undef;
	}

	return $self->map_values_to_fields( $values, $arg_h) if $arg_h->{get_field_names};

	return $values;
}

=head2 subroutine_parser

input:  string
        parser arguments:
        - subr    # the subroutine to parse with
        - arg_h can also contain any other relevant parameters, e.g. get_field_names

output: [ results of parsing ]
        if get_field_names is on, the output will be a hash
        otherwise, the output is whatever the subroutine produces

=cut

#	subroutine-based parsed
sub subroutine_parser {
	my $self = shift;
	my $data = shift || return;
	return unless $data =~ /\S/;
	my $arg_h = shift;

	$self->fatal_msg("No subroutine!") && return unless $arg_h->{subr};
	
	my $values = &{ $arg_h->{subr} }($data, $arg_h);
	
	return undef if ! $values ||
	# an empty array
	( ref $values eq 'ARRAY' && !@$values ) ||
	# an empty hash
	( ref $values eq 'HASH' && !%$values );

	return $self->map_values_to_fields($values, $arg_h) if $arg_h->{get_field_names};

	return $values;

}


##### Processing the results... #####


sub map_values_to_fields {
	my $self = shift;
	my $values = shift || return;
	my $arg_h = shift || return;

	my $field_list = $arg_h->{field_list};

	if ($arg_h->{has_header_row})
	{	if (!$field_list)
		{	$arg_h->{field_list} = $values;
			$arg_h->{n_fields} = scalar @$values;
			return;
		}
	}
	
	## we can either return an array or a hash
	my $return_array = $arg_h->{return_array} || undef;

#	$self->debugme("field_list: ".Dumper($field_list));
#	$self->debugme("input: ".Dumper($values));

	if (scalar @$field_list < scalar @$values)
	{	$self->warning_msg("may miss some parsed data!");
	}

	# if we have a field list, use the values in it as the field names
	my $return;
	my $index = 0;
	foreach my $f (@$field_list)
	{	next unless defined $values->[$index];
		my $f_name = $f;
		# if the item in the field list is a simple scalar, it's the field name
		# set the value in the data hash
		my $value = $values->[$index];
		if (! ref $f )
		{	# fine; a simple scalar
		}
		elsif ( ref $f eq 'CODE')
		{	# we are running another parser here
#			$self->debugme("input into map vals to field sub: ".Dumper($values->[$index]));
			$value = &$f( $value );
#			$self->debugme("CODE result value: ".Dumper($value)."\n");
			## need to find out what we should do with this now!
			## how were we supposed to save this?
			if ($arg_h->{parent_hash_merge})
			{	while ( my ($key, $val) = each %$value )
				{	push @{$return->{ $key }}, @$val;
				}
			}
			elsif ($arg_h->{hash_key_field})
			{	$f_name = $arg_h->{hash_key_field};
			}
			else
			{	# holy shit, what do we do now?!
				$self->printerr("Help! Don't know what to do with these results!");
			}
			next;
		}
		else
		{	$self->printerr("Unknown field structure in map_values_to_fields: ".Dumper($f));
		}

		if ($return_array)
		{	push @$return, { $f_name => $value };
		}
		else
		{	$return = $self->_merge_into_h({ $f_name => $value }, $return);
#			save everything as an arrayref
#			push @{$return->{$f_name}}, $value;
		}

=cut
		else
		{	
			# we'll do something more fancy with $values->[$index]
			my $return;
			if ($f->{subr})
			{	# there's a subroutine to be done
				
			}
			elsif ($f->{parser})
			{	# this field needs further parsing
				# check the parser is ok
				
				if ($self->is_valid_parser($f->{parser}))
				{	$self->debugme("parser args: ".Dumper($f->{parser})."\n");
					$return = $self->parse($f->{parser}, $values->[$index], $f->{parser});
				}
				else
				{	$self->debugme("Correct parser not found.\n");
					# what should we do now?!
					push @{$data_h->{$f}}, $values->[$index];
				}
			}
			else
			{	# not doing any specific subroutine
				$return = $values->[$index];
			}

			if ($f->{field_name})
			{	push @{$data_h->{ $f->{field_name} }}, $return;
			}
			else
			# some kind of hash-based thing?
			{	while ( my ($key, $val) = each %$return)
				{	push @{$data_h->{ $key }}, @$val;
				}
			}
		}
=cut
		$index++;
	}

#	$self->debugme("about to return map_values_to_fields. looks like this:\n".Dumper($return)."\n");

	return $return;
}



=head2 create_new_object

A sub to create an object from an input hash
Errors will be added to the object if creation failed.

input:  $self, $object_type, $arg_h containing any options
output: the object, if obj creation worked
        otherwise, nothing

=cut

sub create_new_object {
	my $self = shift;
	my $obj_type = shift || "GO::Object::Generic";
	my $arg_h = shift || {};
	my $data = shift;
#	$self->startme();
#	$self->debugme("create_new_object arguments: ".Dumper($arg_h)."\n");

	my $results = $obj_type->new({ %$arg_h, data => $data, return_as => 'success_hash' });
	return $results->{OBJECT} if $results->{SUCCESS};
	
	# add the errors / messages to the parser if there are any
	$self->add_message_list($results->{ERROR_LIST}) if $results->{ERROR_LIST};
	return;
}



### OTHER STUFF ###

#sub param {
#	my $self = shift;
#	my $pname = shift;
#	if (@_)
#	{	push @{$self->{_PARAMS}{$pname}}, @_;
#	}
#	return $self->{_PARAMS}{$pname} || undef;
#}


=comment
# a general interface for autoloading?
# not implemented at present

sub parse {
	my $self = shift;
	my $p_name = shift;
	if ($self->is_valid_parser($p_name))
	{	$p_name .= '_parser';
		return $self->$p_name(@_);
	}
	else
	{	die "$p_name is not a valid parser. Dying ";
	}
}
=cut


### SAVING DATA ###

sub _add_to_array {
	my $self = shift;
	my $x = shift;
	my $data_arr = shift;

#	$self->debugme(
#	"self: ".Dumper($self) .
#	"data_arr: ".Dumper($data_arr) .
#	"data to input: ".Dumper($x) .
#	"\n");

#	$self->debugme("data: ".Dumper($data_arr)."\nx: ".Dumper($x)."\n");

	push @$data_arr, $x;
	
#	push @{$self->{__OUTPUT}}, $x;
	
	return $data_arr;
}


sub _merge_into_h {
	my $self = shift;
	my $x = shift;
	my $data_h = shift;

#	$self->debugme(
#	"self: ".Dumper($self) .
#	"data_h: ".Dumper($data_h) .
#	"data to input: ".Dumper($x) .
#	"\n");

	if (ref $x ne 'HASH')
	{	my $msg = "Input format is incorrect: HASH required, found ". ( ref $x || 'SCALAR');
		$self->fatal_msg($msg);
		die $msg;
	}

	while ( my ($key, $val) = each %$x )
	{	if ($data_h->{$key})
		{	if (ref $data_h->{$key} && ref $data_h->{$key} eq 'ARRAY')
			{	#$self->debugme("data_h->{key}: ".Dumper($data_h->{$key})."value: ".Dumper($val));
				push @{$data_h->{ $key }}, $val;
#				push @{$self->{__OUTPUT}{ $key }}, $val;
			}
			else
			{	$data_h->{ $key } = [ $data_h->{$key}, $val ];
			}
		}
		else
		{	$data_h->{$key} = $val;
#			$self->{__OUTPUT}{$key} = $val;
		}
	}
#	push @{$data_h->{ $x->{tag} }}, $x->{value};
	return $data_h;
}


sub _create_h_key {
	my $self = shift;
	my $x = shift;
	my $data_h = shift;
	my $key_field = shift;

#	$self->debugme(
#	"self: ".Dumper($self) .
#	"data_h: ".Dumper($data_h) .
#	"data to input: ".Dumper($x) .
#	"key field: ".Dumper($key_field) .
#	"\n");

	if ($x->{$key_field})
	{	my $key;
		if (! ref $x->{$key_field})
		{	$key = $x->{$key_field};
		}
		elsif (ref $x->{$key_field} eq 'ARRAY')
		{	# array: take the first value
			$key = $x->{$key_field}[0];
		}
		else
		{	$self->fatal_msg("Help! Unrecognized key field format! ". ref $x->{$key_field});
			return $data_h;
		}

		# create the hash using the key and merge the results into the data_h
		return $self->_merge_into_h({ $key => $x }, $data_h);
	}
	else
	{	$self->printerr("key field $key_field not found in record! Record not added.");
	}

	return $data_h;
}


# remove excess whitespace
sub __trim_text {
	my $self = shift;
	my $text = shift;
	$text =~ s/^\s*(\S+.+\S)\s*$/$1/mg;
	return $text;
}



# check regexp is OK
sub __check_regexp {
	my $self = shift;
	my $regexp = shift;
	my $parser_type = shift;
	my $copy = $regexp;

	eval { "" =~ /$regexp/; };
	if ($@)
	{	$self->fatal_msg("invalid regexp : $@");
		return undef;
	}
	
	eval { qr/$regexp/; };
	if ($@)
	{	$self->fatal_msg("invalid regexp : $@");
		return undef;
	}

	if ($parser_type && $parser_type eq 'regexp')
	{	# check for any brackets in the regexp
		$copy = quotemeta $copy;
	
#		$self->debugme($copy);
	
		if ( $copy !~ /\(/ || $copy !~ /\)/ )
		{#	$self->debugme("No brackets found!");
			return undef;
		}
	}
	
	return $regexp;
}


sub __wrap_array_op {
	my $sub = shift;
	my $vals = shift;

	print STDERR "wrap array op: arguments are:\nvals: ".Dumper($vals);
	
	if (ref $vals eq 'HASH')
	{	foreach (values %$vals)
		{	
			$_ = &$sub($_);

		}
	}
	elsif (ref $vals eq 'ARRAY')
	{	foreach (@$vals)
		{	$_ = &$sub($_);
		}
	}
	else
	{	return &$sub($vals);
	}
	print STDERR "returning vals: ".Dumper($vals)."\n";

	return $vals;
}


sub parse_from_form {
	my $self = shift;
	my $query = shift || return;
	my %q_hash = $query->Vars();
	
	#	convert form data into usable data
	foreach (keys %q_hash)
	{	if (exists $q_hash{$_})
		{	my @temp = split("\0", $q_hash{$_});
			$q_hash{$_} = [ @temp ];
		}
		else
		{	#	delete any keys with no value
			delete $q_hash{$_};
		}
	}

#	put in any specialized parsing stuff here
	return $self->_parse_from_form(\%q_hash);
}

sub _parse_from_form {
	my $self = shift;
	return shift;
}





### PREPARING THE PARSER ###

=head2 prep_parser

Preparing the parser for some top quality action.

input:  self, arg_h

output: the various subs that the parser is going to run

=cut

sub prep_parser {
	my $self = shift;
	my $arg_h = shift;

	if (! $arg_h || ref $arg_h ne 'HASH')
	{	$self->fatal_msg("Parser specification must be a hash! Found ". (ref $arg_h || 'SCALAR'));
		return undef;
	}

	$self->startme();

#	$self->debugme("\n\n\narguments for parser preparation:\n".Dumper($arg_h)."\n\n");

	my $parser;
	my $err;

	# translate the parser, if required
	# convert mftv arguments if present
	if ($arg_h->{parser} =~ /^mftv$/i || $arg_h->{parser} =~ /^multi.field.tag.value$/i)
	{	$arg_h = $arg_h = $self->_prep_mftv_parser($arg_h);
		if (!$arg_h)
		{	$self->fatal_msg("warning: could not understand parser arguments.");
			return undef;
		}
	}



	### Prepare the parser subroutine
	if (! $arg_h->{parser} || ! defined $arg_h->{parser}{type})
	{	$self->fatal_msg("No parser specified!");
		$err++;
#		return undef;
	}
	else
	{	# check if the parser is a dummy or not, and if not, check the args are OK
		if ($arg_h->{parser}{type} =~ /^dummy$/i)
		{	# this is just a dummy parser to fill up space...
#			$self->debugme("Using a dummy parser...");
		}
		else
		{	# prepare the parser for action
			my $parser_sub = $self->_prepare_parser_sub($arg_h->{parser});
			if ($parser_sub)
			{	$parser->{parse} = $parser_sub;
			}
			else
			{	$err++;
			}
		}
	}

	#	data to filter
	if ($arg_h->{filter})
	{	## convert this into an if_then argument
		if ($arg_h->{if_then})
		{	if (ref $arg_h->{if_then} eq 'HASH')
			{	$arg_h->{if_then} = [ $arg_h->{if_then}, { 'if' => $arg_h->{filter}, 'when' => 'prepare' } ];
			}
			elsif (ref $arg_h->{if_then} eq 'ARRAY')
			{	push @{$arg_h->{if_then}}, { 'if' => $arg_h->{filter}, 'when' => 'prepare' };
			}
		}
		else
		{	$arg_h->{if_then} = { 'if' => $arg_h->{filter}, 'when' => 'prepare' };
		}
	}


	if ($arg_h->{if_then})
	{	my $test_sub = $self->_prepare_if_then_sub($arg_h->{if_then});
		if ($test_sub)
		{	
			if (ref $test_sub eq 'ARRAY')
			{	foreach (@$test_sub)
				{	$parser->{ $_->{when} } = 
#					sub { return &{$_->{'subr'}}( @_ ); };
					$_->{'subr'};
				}
			}
			else
			{	$parser->{ $test_sub->{when} } = 
#				sub { return &{$test_sub->{'subr'}}( @_ ); };
				$test_sub->{'subr'};
			}
		}
		else
		{	$err++;
			#return undef;
		}
	}

	#	prepare: any transforms we might want to do before parsing
	#	for example, if we want to alter the input in a routine way
	#	process: post-parsing transforms

	foreach ('prepare', 'post_process') #, 'process', )
	{	next unless $arg_h->{$_};
		my $sub = $self->_prepare_sub($arg_h->{$_}, $_);
		if ($sub)
		{	$parser->{$_} = $sub;
		}
		else
		{	#$self->fatal_msg("$_ sub not created!");
			$err++;
			#return undef;
		}
	}
	
	if ($arg_h->{'process'})
	{	my $sub = $self->_prepare_sub($arg_h->{process}, 'process');
		if ($sub)
		{	# we need to do this to every member of the array or hash
			$parser->{process} = sub {
				my $input = shift;
#				$self->debugme("input into process: ".Dumper($input));
				if (ref $input eq 'ARRAY')
				{	$_ = &$sub($_) foreach @$input;
				}
				elsif (ref $input eq 'HASH')
				{	my $result_h;
					while (my ($k, $v) = each(%$input))
					{	my $r = &$sub({ $k => $v });
						if ($r)
						{	$result_h = $self->_merge_into_h($r, $result_h);
						}
					}
					$input = $result_h;
				}
#				$self->debugme("after processing: ".Dumper($input));
				return $input;
			};
		}
		else
		{	$self->fatal_msg("process sub not created!");
			$err++;
#			return undef;
		}
	}

	if ($arg_h->{inner_parser})
	{	my $inner = $self->prep_parser($arg_h->{inner_parser});
		if ($inner)
		{	## put the inner parser into the stack of stuff to be executed

			###### FIX THIS!!  <-- fix what?

			$parser->{inner_parser} = sub {
				# run the parser
				my $results = $self->run_loop( @_, $inner );
#				$self->debugme("results of the second parser: ".Dumper($results));
				# return the results
				return $results;
			};
		}
		else
		{	$self->fatal_msg("Could not create inner_parser!");
			$err++;
		#	return undef;
		}
	}
	
		

	## Let's get rid of this bit!  <-- no, I think it's OK
	#	if we want to convert the data into some other structure, do it here
	if ($arg_h->{return_object})
	{	my $return_sub = $self->_prepare_return_sub($arg_h);
		if ($return_sub)
		{	$parser->{return_object} = $return_sub;
		}
		else
		{	$self->fatal_msg("could not create return_sub!");
			$err++;
		#	return undef;
		}
	}


	# what kind of data structure to put the results into
	my $save_sub = $self->_prepare_save_sub($arg_h);
	if (! $save_sub )
	{	$err++;
	}
	else
	{	$parser->{save} = $save_sub;
	}

=cut
	if ($arg_h->{save_as})
	{	if ($arg_h->{save_as} eq 'hash_add')
		{	# we need to know what to use as the key!
			if ($arg_h->{hash_key_field})
			{	$self->debugme("save method is hash add. Good to know, eh?");
				
				# if we've got a field list, make sure that we actually have the field!
				if ($arg_h->{parser}{field_list} && ! grep { $arg_h->{hash_key_field} eq $_ } @{$arg_h->{parser}{field_list}})
				{	$self->fatal_msg("Specified hash key does not exist in field_list!");
					$err++;
#					return undef;
				}
				else
				{	$parser->{save} = sub {
						return $self->_create_h_key(@_, $arg_h->{hash_key_field});
					};
				}
			}
			else
			{	$self->fatal_msg("No hash key specified for add_to_hash! Crap.\n");
				$err++;
#				return undef;
			}
		}
		elsif ($arg_h->{save_as} eq 'hash_merge')
		{	$self->debugme("save method is hash merge. Good to know, eh?\n");
			$parser->{save} = sub {
				return $self->_merge_into_h(@_);
			};
		}
		elsif ($arg_h->{save_as} eq 'parent_hash_merge')
		{	$self->debugme("save method is parent hash merge. Good to know, eh?\n");
			$parser->{save} = sub {
				return $_[-1];
			};
		}
	}
	
#	if (! $parser->{save} )
#	{	$self->debugme("No save method found. Using the default...\n");
#		$parser->{save} = sub { 
#			my $saved = shift;
#			my $record = shift;
#			push @$saved, $record;
#		};
#	}
=cut

	if ($err)
	{#	$self->debugme("Found errors during parser creation");
		return undef;
	}

	$Data::Dumper::Deparse = 1;
#	$self->debugme("\nThe parser: ".Dumper($parser)."\n");
	$Data::Dumper::Deparse = 0;

	foreach my $sub ('prepare', 'parse', 'inner_parser', 'process', 'return_object', 'post_process')
	{	foreach ("", "_filter")
		{	if ($parser->{$sub.$_})
			{	if (ref $parser->{$sub.$_} eq 'ARRAY')
				{	push @{$parser->{execution_stack}}, @{$parser->{$sub.$_}};
				}
				else
				{	push @{$parser->{execution_stack}}, $parser->{$sub.$_};
				}
			}
		}
	}

	return $parser;

}


=head2 _prepare_parser_sub

input:  arg_h containing
   type =>              # the type of parser; valid parsers are
          regexp, delimited, tag_value, subroutine, multi_field_tag_value (mftv)

   delimiter =>         # for delimited parser; can be a string or a Regexp
   tag_val_sep =>       # ditto but for the tag_value parser
   regexp =>            # ditto for the regexp parser

   is_regexp => 1       # for delimiter and tag_val_sep, if they should be
                        # treated as regular expressions

   subr =>              # subroutine for the subroutine parser
          

OPTIONAL:

   get_field_names => 1 # set this if you want the data to be mapped to the
                        # field names
   has_header_row => 1  # if the data is in some kind of regular format with the
                        # first row being the field names
   field_list => [...]  # array containing the names of the fields
   n_fields => #        # the number of fields to split the data into if using
                        # a delimited parser; if not set, will use the number
                        # of fields in the field_list (if specified)

=head3 Extra field_list Goodness

field_list values are usually scalars, but you can also specify extra parsing of
the values by putting in a hash.



output: parser subroutine if all is well; otherwise undef

=cut

sub _prepare_parser_sub {
	my $self = shift;
	my $arg_h = shift;

	# check the arg_h is in the right format
	if (! $arg_h || ref $arg_h ne 'HASH')
	{	$self->fatal_msg("Parser arguments must be a hash! Found "
		. (ref $arg_h || 'SCALAR') );
		return undef;
	}

#	$self->debugme("parser arguments: ".Dumper($arg_h->{parser}));

	if ($arg_h->{type} eq 'regexp')
	{	## check for parser args!!
		# multiple fields, in some regexp-able format
		if (! defined $arg_h->{regexp})
		{	$self->fatal_msg("No regular expression supplied; cannot run parse");
			return undef;
		}
		
		if (! ref $arg_h->{regexp})
		{	# convert a string into a regexp
			# add in a check that the string creates a valid regexp
			$arg_h->{regexp} = $self->__check_regexp($arg_h->{regexp}, 'regexp');
			return undef if ! $arg_h->{regexp};
		}
		# some other mysterious form of regexp
		elsif (ref $arg_h->{regexp} ne 'Regexp')
		{	$self->fatal_msg("Cannot understand regexp in form ".ref $arg_h->{regexp});
			return undef;
		}

#		$self->debugme("regexp looks like this: ".Dumper($arg_h->{regexp}));

		$self->__check_parser_args($arg_h);
		
		return
		sub {
#			my $record = shift;
			return $self->regexp_parser(shift, $arg_h);
		};

	}
	# Prep delimited or tag-value parsers
	elsif ($arg_h->{type} eq 'delimited' || $arg_h->{type} eq 'tag_value')
	{	
		my $p_sep_hash = { delimited => 'delimiter', tag_value => 'tag_val_sep' };
		my $sep = $p_sep_hash->{$arg_h->{type}};
		
		# ensure that the separator is defined if it does exist
		# delete it if it exists but isn't defined
		if (exists $arg_h->{$sep}
			&& ( ! defined $arg_h->{$sep} || length $arg_h->{$sep} == 0 ))
		{	$self->warning_msg("$sep exists but is undefined; using default");
			delete $arg_h->{$sep};
		}
			
		# if it doesn't exist, use the default
		if (! $arg_h->{$sep} )
		{	$arg_h->{$sep} = $self->default_value($sep);
		}

		# if the separator is a text string...
		if (! ref $arg_h->{$sep})
		{	# if it's not a regular expression, quote metacharacters
			if (! $arg_h->{is_regexp} )
			{	$arg_h->{$sep} = quotemeta $arg_h->{$sep};
			}
			
			# convert the string into a regexp
			# add in a check that the string creates a valid regexp
			$arg_h->{$sep} = $self->__check_regexp($arg_h->{$sep});
			return undef if ! $arg_h->{$sep};
		}
		# some other mysterious form of separator - return an error
		elsif (ref $arg_h->{$sep} ne 'Regexp')
		{	$self->fatal_msg("Cannot understand $sep in form ".ref $arg_h->{$sep});
			return undef;
		}
		## otherwise, we're dealing with Regexps, which should be fine.

		$self->__check_parser_args($arg_h);
		my $p_type = $arg_h->{type} .= "_parser";

		# create the subroutine...
		return
		sub {
			return $self->$p_type(shift, $arg_h);
		};

	}
	elsif ($arg_h->{type} eq 'subroutine')
	{	# check that the subroutine is defined and that it's a code reference
		if (! $arg_h->{subr} ||
		( $arg_h->{subr} && ref $arg_h->{subr} ne 'CODE' ) )
		{	$self->fatal_msg("No valid subr defined for subroutine parser!");
			return undef;
		}

		return $self->__return_eval_sub($arg_h->{subr}, "subroutine parser", @_);

		return
		sub {
			
			my $result;
			eval {
				$result = &{$arg_h->{subr}}(@_)
			};

			return $result unless $@;

			if ($@)
			{	#	we actually want to stop completely here, so let's die
				die "Subroutine parser encountered an error: $@";
			#	$self->fatal_msg("Subroutine parser encountered an error: $@");
			#	return undef;
			}
		};

	}
	else # we don't know the parser type!
	{	$self->fatal_msg("Parser type is unknown or undefined!\n".Dumper($arg_h->{type}));
		return undef;
	}
}

=head2 __check_parser_args

Ensures that the correct arguments are supplied for the parser

=cut

sub __check_parser_args {
	my $self = shift;
	my $parser_args = shift;

	# depending on what format we want our results in, we may want to attach
	# the values from the field_list to our data
	if ( # ($parser_args->{return_as} && $parser_args->{return_as} =~ /hash/) ||
			$parser_args->{get_field_names}
		|| $parser_args->{has_header_row}
		|| $parser_args->{field_list} )
	{	## we want to match up the field names with the values
		$parser_args->{get_field_names} = 1;
#		$self->debugme("Setting the get_field_names flag");
	}

	#	if we are doing a parse involving a field_list, see if there's going to be
	#	another parse
	if ($parser_args->{field_list})
	{#	$self->debugme("Found a field list!");
		foreach my $f ( @{$parser_args->{field_list}} )
		{	# if it's a plain scalar, there's nothing more to do
			next unless ref($f);
			if (ref $f eq 'HASH')
			{	# we'll make the blind assumption that the hash is a parser hash
				my $inner_parser = $self->prep_parser($f);
				if ($inner_parser)
				{#	$self->debugme("inner_parser: ".Dumper($inner_parser));
					# put this parser stuff in
					#	$parser->{process} = ## create a new parser
					$f = 
					sub {
						# run the parser
						my $results = $self->run_loop( @_ , $inner_parser );
#						$self->debugme("results of the second parser: ".Dumper($results));
						# return the results
						return $results;
					};

					# we need to know what to do with the results
					if ($f->{hash_key_field})
					{	$parser_args->{hash_key_field} = $f->{hash_key_field};
					}
					elsif ($f->{save_as} && $f->{save_as} eq 'parent_hash_merge')
					{	$parser_args->{parent_hash_merge} = 1;
					}
					else
					{	# help! what should we do?!
						$self->printerr("Help! Don't know how to deal with the results of this sub!");
					}

				}
				else
				{	$self->fatal_msg("inner parser subr not created!");
					return undef;
				}
			}
			else
			{	$self->fatal_msg("Data in field_list must be HASH or SCALAR, not ".ref $f);
				return undef;
			}
		}
	}
}


=head2 _prep_mftv_parser

Preparing the multi-field-tag-value parser
We've used shortcut tags here, so change it back into the standard format

There are two steps in this parse:

1) split each record with \n to produce a set of fields (done by $p1)
2) split each field into tag-value pairs ($p2)

=cut

sub _prep_mftv_parser {
	my $self = shift;
	my $arg_h = shift;
	$self->startme();
	my $p1;
	my $p2;

#	$self->debugme("arg_h: ".Dumper($arg_h));

	foreach ('filter', 'prepare', 'process', 'if_then', 'post_process')
	{	if ($arg_h->{$_ . "_record"})
		{	$p1->{$_} = $arg_h->{$_ . "_record"};
		}
		if ($arg_h->{$_ . "_field"})
		{	$p2->{$_} = $arg_h->{$_ . "_field"};
		}
	}

	$p1->{parser} =
	{	type => 'delimited',
		delimiter => $arg_h->{field_sep} || "\n", 
		return_as => 'values_only'
	};
	
	$p2->{parser} =
	{	type => 'tag_value',
		tag_val_sep => $arg_h->{tag_val_sep} || ":" 
	};

	$p2->{save_as} = 'hash_merge';

	if ($arg_h->{return_record_as_object})
	{	$p1->{return_object} = $arg_h->{return_record_as_object};
	}

	$p1->{inner_parser} = $p2;
	
	$self->debugme("mftv parser:\n".Dumper($p1)."\n");
	return $p1;
}


=head2 _prepare_if_then_sub

Prepare a test subroutine

input:  self, arg_h
output: either undef or a subroutine reference

input format:

if_then => {
	if => < the test here >
	then => < action to perform, CODE ref >
	when => 'stage_name' # defaults to 'prepare' if not set
}


valid inputs for 'if':

scalar - assumed to be a text string that the value must match
regexp - ditto
code   - a subroutine reference
array  - each item in the array is examined and converted into a subroutine;
         the array is treated as an OR query, i.e. if one item succeeds, the
         whole query succeeds
hash   - a boolean query, formatted for GO::Boolean


valid inputs for 'then':

code   - a subroutine to be executed if the test returns true
hash   - a dispatch table with values as keys and subroutines to be executed if
         the result is equal to the key



valid inputs for 'when':

prepare, post_process, # parse, process, save

The test is done *before* the stage in each case

=cut

sub _prepare_if_then_sub {
	my $self = shift;
	my $test_args = shift || return; # the test criteria

	$self->debugme("test_args: ".Dumper($test_args));

	# the test args must either be an array or a hash
	if (ref $test_args && ref $test_args eq 'ARRAY')
	{	# if we have an array, check that each member of the array is a valid
		# if-else sub
		my $test_arr;
		foreach (@$test_args)
		{	if (! ref $_ || ref $_ ne 'HASH')
			{	# we're not getting into one of those recursion situations, sorry
				$self->fatal_msg("if_then sub must be of type HASH, not " . ref $_ || 'SCALAR');
			}
			else
			{	my $t = $self->_prepare_if_then_sub($_);
				push @$test_arr, $t if $t;
			}
		}
		if (scalar @$test_args == scalar @$test_arr)
		{	# phew!
			return $test_arr;
		}
		else
		{	return undef;
		}
	}
	elsif (! ref $test_args || ref $test_args ne 'HASH')
	{	$self->fatal_msg("if_then sub must be of type HASH, not " . ref $test_args || 'SCALAR');
		return undef;
	}

	# check that we have a test to perform
	if (! $test_args->{'if'})
	{	$self->fatal_msg("No test found in if_then arguments");
		return undef;
	}

	my $if_then_sub;

	### Check that the test sub is OK
	my $test_sub = $self->__check_test( $test_args->{'if'} );
	return undef if ! $test_sub;

	# see what we're going to do with the results
	## 'then' must be a hash or code
	if (! $test_args->{'then'})
	{	# this is a basic filter; if there is a result from the sub, continue;
		# otherwise, go on to the next item to test
		$if_then_sub = sub {
			my $value = shift;
			my $result = &$test_sub($value);
			return $value if $result;
			return undef;
		};
	}
	elsif (! ref $test_args->{'then'} )
	{	$self->fatal_msg("if_then 'then' data should be of type HASH or CODE, not SCALAR");
		return undef;
	}
	elsif (ref $test_args->{'then'} eq 'CODE')
	{	my $action = $test_args->{'then'};
		# fine, no need to do anything
		$if_then_sub = sub {
			my @args = @_;
			my $result = &$test_sub(@args);
			
			if ($result)
			{	# if there is a result, do the action
				return #&$action(@args);
					$self->__return_eval_sub($action, "if_then action sub", @args);
			}
			return undef;
		};
	}
	elsif (ref $test_args->{'then'} eq 'HASH')
	{	# see if we're dealing with an inner parser...
		# this may well be a whole new and exciting world we're entering into!
		if ($test_args->{'then'}{'inner_parser'}) # we've got an inner parser on the go... woohoo!
		{	my $inner = $self->prep_parser($test_args->{'then'}{inner_parser});
			return undef if ! $inner;
			# otherwise, run the parser on the results
			my $loop_sub = 'run_loop';
			if ($test_args->{'when'} && $test_args->{'when'} eq 'post_process')
			{	$loop_sub = 'run_loop_single_item';
			}
			
			$if_then_sub = sub {
				my $value = shift;
				print STDERR "value looks like this: ".Dumper ($value) ."\n";
				my $result = &$test_sub($value);
				if ($result)
				{	return $self->$loop_sub( $value , $inner);
				}
				return undef;
			};

		}
		else
		{	# if the item returns this result, do this
			# check that what is in the 'action' is a valid thing to do
			foreach my $action (values %{$test_args->{'then'}})
			{	if (! ref $action)
				{	$self->fatal_msg("if_then 'then' hash value should be of type CODE, not SCALAR");
					return undef;
				}
				elsif (ref $action eq 'HASH')
				{	if ($action->{'inner_parser'}) # we've got an inner parser on the go... woohoo!
					{	# make sure the inner parser passes muster
						my $inner = $self->prep_parser($action->{inner_parser});
						return undef if ! $inner;
						# otherwise, run the parser on the results
						my $loop_sub = 'run_loop';
						if ($test_args->{'when'} && $test_args->{'when'} eq 'post_process')
						{	$loop_sub = 'run_loop_single_item';
						}

						# convert 'action' into a subroutine
						$action = sub {
							my $value = shift;
#							$self->debugme("value looks like this: ".Dumper ($value));
							my $result = &$test_sub($value);
							if ($result)
							{	return $self->$loop_sub( $value , $inner );
							}
							return undef;
						};
					}
					else
					{	$self->fatal_msg("Don't know what to do with action hash!");
						return undef;
					}
				}
				elsif (ref $action eq 'CODE')
				{	# a code ref: we're fine
				}
				else
				{	$self->fatal_msg("if_then 'then' hash value should be of type CODE or HASH, not ". ref $action);
					return undef;
				}
			}

			$if_then_sub = sub
			{	my @args = @_;
				my $result = &$test_sub(@args);
				if ($result)
				{	if ($test_args->{'then'}{$result})
					{	# if the result exists in the action hash, do it
						return $self->__return_eval_sub($test_args->{'then'}{$result}, "if_then action sub", @args);
					}
#						$self->debugme("got a result, but it wasn't one we expected!");
					if ($test_args->{'then'}{''})
					{	return $self->__return_eval_sub($test_args->{'then'}{""}, "if_then action sub", @args);
					}
				}
				return undef;
			};

		}
	}
	else
	{	$self->fatal_msg("if_then 'then' data should be of type HASH or CODE, not ". ref $test_args->{'then'});
	}

	my $when;

	if ( $test_args->{when} )
	{	# this is when the test should be executed
		# allowable times
		# BEFORE prepare
		# BEFORE parse
		# AFTER parse, before any processing the parse results
		# BEFORE post-process
		# BEFORE save
		
		my $times = [ 'prepare', 'post_process' ]; #, 'parse', 'process', 'post_process', 'save' ];
		
		if (ref $test_args->{when})
		{	$self->fatal_msg("if_then when argument should be of type SCALAR, not ". ref $test_args->{when});
			return undef;
		}
		elsif ( ! grep { $test_args->{when} eq $_ } @$times )
		{	$self->fatal_msg("if_then when argument '".$test_args->{when}."' not understood! Please choose one of the following: ".join(", ", @$times));
			return undef;
		}
		else
		{	# phew! We're ok
			$when = $test_args->{when} . "_filter";
		}
	}
	else
	{	# assume it is a prepare filter
		$when = "prepare_filter";
	}
	# phew! We're ok
	return { when => $when, subr => $if_then_sub };
}


=head2 __check_test

Makes sure that test criteria are in the correct format

input:  self, arg_h
output: either undef or a subroutine reference

Valid inputs:

scalar - assumed to be a text string that the value must match
regexp - ditto
code   - a subroutine reference
array  - each item in the array is examined and converted into a subroutine;
         the array is treated as an OR query, i.e. if one item succeeds, the
         whole query succeeds
hash   - a boolean query, formatted for GO::Boolean

=cut

sub __check_test {
	my $self = shift;
	my $test = shift || return; # the test criteria
	
	#	convert the test bits into subroutines
	if (! ref($test) )
	{	# a single value
		# turn this into a subroutine that we can execute

		# make sure that we have a valid regexp
		$test = $self->__check_regexp($test);
		return undef if ! $test;

		return sub {
			my $val = shift;
			return $val =~ /$test/;
		};
	}
	elsif (ref $test eq 'Regexp')
	{	return sub {
			my $val = shift;
			return $val =~ /$test/;
		};
	}
	elsif (ref $test eq 'CODE')
	{	return 
		sub {
			my $val = shift;
			my $result;
			eval { $result = &$test($val) };
			
			if ($@)
			{	die "Got an error from a subroutine: $@";
			}
			return $result || undef;
		};
	}
	elsif (ref $test eq 'ARRAY')
	{	my $new_test_arr;
		foreach my $i (@$test)
		{	my $subr = $self->__check_test($i);
			push @$new_test_arr, $subr if $subr;
		}

		if (!$new_test_arr || ! @$new_test_arr || scalar @$new_test_arr != scalar @$test)
		{	$self->fatal_msg("Invalid tests found in test array");
			return undef;
		}

		# at present this is an OR test,
		# i.e. if one test passes, the whole lot do
		#	check that it's a valid query
		my $bool_query = GO::Boolean::check_boolean_query( { OR => [ map {  { FN => $_ }  } @$new_test_arr ] } );
		if (!$bool_query)
		{	$self->fatal_msg("Test arguments are invalid");
			return undef;
		}

		return
		sub {
			my $val = shift;
			return GO::Boolean::run_boolean_query($bool_query, { test => $val } );
		};
	}
	elsif (ref $test eq 'HASH')
	{	if ($test->{FN})
		{	# it looks like we're using the hash construction from
			# GO::Boolean::run_boolean_query
			# unfortunately we don't know what the operator is. Make it 'OR' by
			# default
			
			#	check that it's a valid query
			my $bool_query = GO::Boolean::check_boolean_query({ OR => [ $test ] });
			if (!$bool_query)
			{	$self->fatal_msg("Test arguments are invalid");
				return undef;
			}
			
			return sub {
				my $val = shift;
				return GO::Boolean::run_boolean_query( $bool_query, { test => $val } );
			};
		}
		elsif ($test->{AND} || $test->{OR} || $test->{NOT})
		{	# it looks like we're using the hash construction from
			# GO::Boolean::run_boolean_query

			#	let's check that it's a valid query
			my $bool_query = GO::Boolean::check_boolean_query($test);
			if (!$bool_query)
			{	$self->fatal_msg("Test arguments are invalid");
				return undef;
			}

			return sub {
				my $val = shift;
				return GO::Boolean::run_boolean_query($bool_query, { test => $val });
			};
		}
		else
		{	$self->fatal_msg("Test arguments are invalid:\n".Dumper( $test ) );
			return undef;
		}
	}
	else
	{#	$self->debugme("Unrecognized test type: ". ref($test) ."\n");
	}
	return undef;
}




=head2 _prepare_return_sub

Prepare a subroutine to create a new object from the data

input:  self, arg_h with return_object => 'object_name' specified
output: the object or undef if no object creation results

=cut

sub _prepare_return_sub {
	my $self = shift;
	$self->startme;
	my $arg_h = shift;

	# make sure we have arguments and a return_object specified
	if (! $arg_h || ! %$arg_h || ! defined $arg_h->{return_object} || ref $arg_h->{return_object} )
	{	$self->fatal_msg("No return object specified in return sub!");
		return undef;
	}

	my $obj_type = $arg_h->{return_object};
#	$self->debugme("object type: $obj_type\n");

	# check that the object actually exists!
	eval "require $obj_type";
	if ($@) {
		$self->fatal_msg("$obj_type is not installed on this system: $@");
		return undef;
	}

	## it exists... phew!
	## get various bits and bobs for object creation and return
	return 
	sub {
#		my $new = 
		return
		$self->create_new_object( $obj_type, {
			object_spec => $arg_h->{object_spec} || $obj_type->get_spec(),
			dfv_profile => $arg_h->{dfv_profile} || $obj_type->dfv_profile(),
			transform_data => 1,
			return_as => 'success_hash',
			check_input => 1,
		}, shift);

#		$self->debugme("new object: ".Dumper($new));
#		return $new;
	};
}


=head2 _prepare_sub

Gets the subroutines used by the parser ready for action

=cut

sub _prepare_sub {
	my $self = shift;
	my $args = shift || return;
	my $sub_type = shift;

#	$self->debugme("args: ".Dumper($args)."sub_type: ".Dumper($sub_type)."\n");

	if (! ref $args)
	{	$self->fatal_msg("could not understand argument $args");
		return undef;
	}
	elsif (ref $args eq 'CODE')
	{	# fine, this is a subroutine; we can return it as it is
		return sub { $self->__return_eval_sub($args, "custom $sub_type sub", @_) };
	}
	elsif (ref $args eq 'ARRAY')
	{	my @new_fns;
		# check all these mofos are OK
		foreach (@$args)
		{	my $new = $self->_prepare_sub($_);
			if ($new)
			{	push @new_fns, $new;
			}
			else
			{	$self->fatal_msg("could not identify fn in $_\n");
				return undef;
			}
		}

		return sub {
			# iterate through the code until either all the subs are complete
			# or we hit some kind of error
			my $input = shift;
			foreach (@new_fns)
			{	my $result = &$_($input);
				return undef unless $result;
				$input = $result;
			}
			return $input;
		};
	}
	elsif (ref($args) eq 'HASH')
	{	if ($args->{parser})
		{	# this may well be another parser. How thrilling!
			my $parser = $self->prep_parser($_);
			if ($parser)
			{	# our parser was OK
				return sub {
					return $self->run_loop( @_, $parser );
				};
			}
			else
			{	$self->fatal_msg("prep_parser failed!");
				return undef;
			}
		}
#		elsif ($args->{filter})
#		{	# this is a filter subr
#			my $filter = $self->_prepare_filter_sub($args->{filter});
#			if (!$filter)
#			{	# die
#				
#			}
#		elsif ($args->{boolean})
#		{	# run a boolean sub
#		}
		else
		{	$self->fatal_msg("could not understand parser argument hash\n".Dumper($args));
			return undef;
		}
	}
	else
	{	$self->fatal_msg("argument type is invalid: ".ref($args));
		return undef;
	}

	return undef;

}


=head2 _prepare_save_sub

How to save the data

=cut

sub _prepare_save_sub {
	my $self = shift;
	my $arg_h = shift;
	
	# what kind of data structure to put the results into
	if ($arg_h->{save_as})
	{	if ($arg_h->{save_as} eq 'hash_add')
		{	# we need to know what to use as the key!
			if (! $arg_h->{hash_key_field})
			{	$self->fatal_msg("No hash key specified for hash_add! Crap.");
				return undef;
			}
			
			# if we've got a field list, make sure that we actually have the field!
			if ($arg_h->{parser_args}{field_list} && ! grep { $arg_h->{hash_key_field} eq $_ } @{$arg_h->{parser_args}{field_list}})
			{	$self->fatal_msg("Specified hash key does not exist in field_list!");
				return undef;
			}

#			$self->debugme("save method is hash add. Good to know, eh?");
			
			return sub { return $self->_create_h_key(@_, $arg_h->{hash_key_field}) };
		}
		elsif ($arg_h->{save_as} eq 'hash_merge')
		{#	$self->debugme("save method is hash merge. Good to know, eh?");
			return sub { return $self->_merge_into_h(@_) };
		}
		elsif ($arg_h->{save_as} eq 'parent_hash_merge')
		{#	$self->debugme("save method is parent hash merge. Good to know, eh?");
			return sub { return $_[-1] };
		}
		elsif ($arg_h->{save_as} eq 'array')
		{	#delete $arg_h->{save_as};
			return sub { return $self->_add_to_array(@_) };
		}
		else
		{#	$self->debugme("Do not understand the save argument ".$arg_h->{save_as});
			return undef;
		}
	}

	# nothing specified...
	return sub { return $self->_add_to_array(@_) };
}

=head2 __return_eval_sub

Wraps a subroutine in an eval

=cut

sub __return_eval_sub {
	my $self = shift;
	my $sub = shift;
	my $sub_name = shift || "subroutine";
	
#	return
#	sub {
		my $result;
		eval {
			$result = &$sub(@_)
		};

		return $result unless $@;

		if ($@)
		{	#	we actually want to stop completely here, so let's die
			die "Encountered an error in " . $sub_name . ": $@";
		}
#	};
}







1;