package GO::GeneralPurposeParser; use strict; use lib '/Users/gwg/go/scratch/tools'; #use Carp; #use Exporter; use Data::Dumper; $Data::Dumper::Indent = 1; #$Data::Dumper::Deparse = 1; #use GO::Object::Generic; use base 'GO::MsgLite'; use vars qw($AUTOLOAD); use GO::Boolean; use GO::Utilities; =head1 Public methods: new, parse_from_file, parse_array, reset These methods are the only methods that should be used to interact with and get data from the parser =cut sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = {}; bless $self, $class; return $self->set_parser_params(@_); } sub set_parser_params { my $self = shift; my $params = shift || {}; =comment out for now ## fail modes: # FAIL => die on failure # WARN_AND_IGNORE => # Issue a warning and don't store the data # WARN_AND_RECORD => # Issue a warning but keep the data # IGNORE => # as above, no warning given # RECORD => # as above, no warning given my $error_mode = $params->{error_mode}; # check the error mode is legal if ($error_mode) { unless (grep { $error_mode eq $_ } qw(FAIL WARN_AND_IGNORE WARN_AND_RECORD IGNORE RECORD)) { undef $error_mode; } } # set the error mode; default to 'warn and record' if no error mode provided $self->{__ERROR_MODE} = $error_mode || $self->default_value('error_mode'); =cut ## verbosity if ($params->{verbose_mode}) { $self->set_verbosity($params->{verbose_mode}); } ## returning results: # default: return an array / hash / whatever with the parsed values in it # success_hash: return the data in a slightly different form # parsed => # unparsed => my $return_mode = $params->{return_mode}; if ($return_mode) { unless (grep { $return_mode eq $_ } qw(SUCCESS_HASH)) { undef $return_mode; } } # set the return mode $self->{__RETURN_MODE} = $return_mode if $return_mode; return $self; } =head2 reset_parser Resets the parser and removes any existing data =cut sub reset_parser { my $self = shift; foreach my $k (keys %$self) { delete $self->{$k} unless grep { $k eq $_ } qw(__RETURN_MODE __ERROR_MODE __VERBOSITY); } return $self; } =head2 parse_from_file Wrapper for the parse_from_file_subr subroutine =cut sub parse_from_file { my $self = shift; my $results; eval { $results = $self->parse_from_file_subr(@_); }; if ($@) { $self->fatal_msg("Something terrible happened! $@"); } return $self->return_results($results); } =head2 parse_text Wrapper for the parse_text_subr subroutine =cut sub parse_text { my $self = shift; my $results; eval { $results = $self->parse_text_subr(@_); }; if ($@) { $self->fatal_msg("Something terrible happened! $@"); } return $self->return_results($results); } =head2 parse_array Wrapper for the parse_array_subr subroutine =cut sub parse_array { my $self = shift; my $results; eval { $results = $self->parse_array_subr(@_); }; if ($@) { $self->fatal_msg("Something terrible happened! $@"); } return $self->return_results($results); } =head2 return_results Returns the results either as they are, or in the form of a success hash. Success hash looks like this: { results => $results, # the results (if any) success => 1, # 0 if no results are found msg_list => $self->get_all_msgs, # any messages } =cut sub return_results { my $self = shift; my $results = shift; if (! $self->{__RETURN_MODE} ) # no return mode specified; just return results { return $results; } if ($self->{__RETURN_MODE} eq 'SUCCESS_HASH') { my $return_h; if (defined $results) { $return_h = { success => 1, results => $results }; } else { $return_h = { success => 0 }; } if ($self->has_msgs) { $return_h->{msg_list} = $self->get_all_msgs; } return $return_h; } } =head2 default_value Get default values for parameters =cut sub default_value { my $self = shift; my $param = shift; my $default = { file_separator => "\n", delimiter => qr/\t/, # regexp => tag_val_sep => qr/:/, error_mode => 'WARN_AND_RECORD', }; return $default->{$param} || $self->printerr("No default value for the parameter $param") && return undef; } sub valid_parsers { return [ 'regexp', 'tag_value', 'delimited', 'multi_field_tag_value', 'subroutine', ]; } =head2 is_valid_parser Returns 1 if the parser is valid; 0 otherwise =cut sub is_valid_parser { my $self = shift; my $p_name = shift; return 1 if grep { $p_name eq $_ } @{$self->valid_parsers}; return 0; } =head2 parse_from_file_subr input: self, arg_h containing - file # the name of the file - file_separator # separate file into records; optional; default is "\n" - preserve_separator # if present, will keep the file_separator instead # of chopping it off - preserve_whitespace # if present, will preserve records consisting # solely of whitespace, with no character data - inner_parser # a hash containing specifications for further processing # of the data output: either an error message to stderr or the parsed file data Unless there is further parsing data in the inner_parser argument, data will be returned as an array of strings =cut sub parse_from_file_subr { my $self = shift; my $arg_h = shift; $self->startme(); if (! $arg_h->{file}) { $self->fatal_msg("No file name specified for the read_file parser!"); return undef; } ## check that the file exists and is readable my $file = $arg_h->{file}; if (!$file) { $self->fatal_msg("The file $file could not be found"); return undef; } elsif (! -r $file) { $self->fatal_msg("The file handle $file is invalid"); return undef; } ## find out how to separate the data if ($arg_h->{file_separator}) { # make sure the separator is the correct format if (ref $arg_h->{file_separator}) { $self->fatal_msg("file_separator should be a string"); return undef; } } if (!$arg_h->{file_separator}) { # use the default $arg_h->{file_separator} = $self->default_value('file_separator'); # $self->debugme("file separator is ".Dumper($arg_h->{file_separator})); } my @to_do_list; unless ($arg_h->{preserve_separator}) { push @to_do_list, sub { my $x = shift; $x =~ s/$arg_h->{file_separator}$//; return $x }; } unless ($arg_h->{preserve_whitespace}) { push @to_do_list, sub { my $x = shift; return undef unless $x =~ /\S/; return $x; }; } my $output; my $parser; if (! $arg_h->{inner_parser}) { # we need a save subroutine. Push the data on to a list, since nothing is specified push @to_do_list, sub { my $x = shift; push @$output, $x; }; } else { # we're actually going to do something with the results. ## A little bit of checking before we start to ensure that we have ## all that is required $parser = $self->prep_parser($arg_h->{inner_parser}); # quit if there's no parser return undef unless $parser; # $Data::Dumper::Deparse = 1; # print STDERR "parser: ".Dumper($parser); # $Data::Dumper::Deparse = 0; # $self->debugme("parser looks like this: ".Dumper($parser)."\n"); # push (@to_do_list, sub { return $self->run_loop_single_item(shift, $parser) }); } # $self->debugme("To do list: ".Dumper([ @to_do_list ])); my $save = $self->_prepare_save_sub($arg_h); $self->debugme("save sub: ".Dumper($save)); return undef if ! $save; # read in the file # open the file and read in the lines open(FH, $file) || $self->fatal_msg("Could not open file $file: $!") && return undef; # set the file separator local $/ = $arg_h->{file_separator}; # $self->debugme("file separator: ".Dumper($/)); if (! $parser) { while () { my $line = $_; foreach (@to_do_list) { my $result = &$_($line); next unless $result; $line = $result; } } } else { while () { my $line = $_; # $self->debugme("Getting a record..."); foreach (@to_do_list) { my $result = &$_($line); next unless $result; $line = $result; } # $self->debugme("Ran the to-do list tests on line"); my $result = $self->run_loop_single_item($line, $parser); # $self->debugme("Ran the loop on line"); next unless $result; # save the output $self->debugme("running the save sub, input is\n".Dumper($result)."\n\n"); $output = &$save($result, $output); } } close(FH); if (! $output || (ref $output eq 'ARRAY' && ! @$output) || (ref $output eq 'HASH' && ! %$output) ) { # add an error if we don't have any data $self->warning_msg({ MSG => "Parsing $file produced no results", MSG_CODE => 'no_results' }); return undef; } return $output; # } =head2 parse_text_subr A little shortcut to put the input into parse_array_subr input: $self, data to be parsed # as a string the parser arguments output: parsed data! =cut sub parse_text_subr { my $self = shift; my $string = shift; my $results = $self->parse_array_subr( [ $string ], @_ ); # return the first item if the results are as an array if ($results && ref $results eq 'ARRAY' && scalar @$results == 1) { return $results->[0]; } return $results; } =head2 parse_array_subr This is the main function for getting parsing done. input: $self, data to be parsed # as an arrayref the parser arguments, prepared using prep_parser output: the data, all nicely parsed! =cut sub parse_array_subr { my $self = shift; my $data = shift; my $data_copy; # check that the data is an array ref; if not, convert it into one if (! $data ) { $self->fatal_msg({ MSG => "No input data could be found", MSG_CODE => 'no_data' }); return undef; } elsif ( ! ref $data ) { # convert it into an array $data_copy = [ $data ]; } elsif (ref $data ne 'ARRAY') { $self->fatal_msg({ MSG => "Error! Parser requires an array as input", MSG_CODE => 'data_wrong_format' }); return undef; } else { # phew! Everything is OK! # make a copy of the data so that we don't mess up the original. foreach (@$data) { if (ref $_) { ### oh no! We've somehow managed to get a data structure in here... $self->fatal_msg({ MSG => "Error! Parser requires an array of strings as input", MSG_CODE => 'data_wrong_format' }); return undef; } push @$data_copy, $_ if defined $_; } } if (!$data_copy || !@$data_copy) { $self->fatal_msg({ MSG => "No input data could be found", MSG_CODE => 'no_data' }); return undef; } my $arg_h = shift; $self->startme(); my $parser = $self->prep_parser($arg_h); # quit if there's no parser return undef unless $parser; my $results = $self->run_loop($data_copy, $parser); if (! $results ) { $self->warning_msg({ MSG => "Parsing produced no results", MSG_CODE => 'no_results' }); return undef; } return $results; } =head2 run_loop input: array to be parsed parser arguments output: parsed things! =cut sub run_loop { my $self = shift; my $input_arr = shift || return; my $parser = shift; #$Data::Dumper::Deparse = 1; #$self->debugme("Arguments IN:\n" # ."self: ".Dumper($self) # ."\ninput: ".Dumper($input) # ."\nparser: ".Dumper($parser) #); #$Data::Dumper::Deparse = 0; return $input_arr if ! $parser->{execution_stack}; my $save = $parser->{save}; my $output; # $self->debugme("input has type ". (ref $input || 'SCALAR') ); # if (ref $input && ref $input eq 'HASH') # { my @temp; # while (my ($k, $v) = each %$input) { # push @temp, { $k => $v }; # } # $input = [ @temp ]; # } INPUT_LOOP: foreach my $input (@$input_arr) { # skip unless there's some kind of input unless ($input) {# $self->debugme("skipping blank input") && next; } my $result = $self->run_loop_single_item($input, $parser); next unless $result; # my $result; # foreach my $sub ( @{$parser->{execution_stack}} ) # # { my $result = &$sub($input); # { $result = &$sub($input); # if (! $result ) # { # new : put the unparsed data here for safekeeping # push @{$self->{UNPARSED_DATA}}, $input; # return undef; # next INPUT_LOOP; # } # $input = $result; # $self->debugme("$sub done: results look like this:\n".Dumper($result)."\n"); # } # # save the output # $self->debugme("running the save sub..."); $output = &{$parser->{save}}($result, $output); # $self->debugme("output post save: ".Dumper($output)); } # $self->debugme("returning output: ".Dumper($output)); return $output; # return $parser->{__OUTPUT}; } =head2 run_loop_single_item input: item to be parsed parser arguments output: parsed things! =cut sub run_loop_single_item { my $self = shift; my $input = shift || return; my $parser = shift; # my $output = shift; return $input if ! $parser->{execution_stack}; foreach my $sub ( @{$parser->{execution_stack}} ) { my $result = &$sub($input); if (! $result ) { # new : put the unparsed data here for safekeeping push @{$self->{UNPARSED_DATA}}, $input; return undef; } $input = $result; # $self->debugme("$sub done: results look like this:\n".Dumper($result)."\n"); } return $input; } ##### The parsers themselves. Woooo! ##### =head2 tag_value_parser input: string parser arguments: - tag_val_sep # will use the default otherwise output: { => } if there is no value, the parser returns { => undef } if there is no tag, the parser returns { '' => } if there is nothing, the parser returns undef Note that each parsed string gets turned into a hashref, which could potentially be rather on the un-useful side. It may be beneficial to use the 'save_as' argument to output a different structure. =cut sub tag_value_parser { my $self = shift; my $data = shift || return; return unless $data =~ /\S/; my $arg_h = shift; # $self->debugme("TVP: arg_h: ".Dumper($arg_h)); # $self->debugme("input: ".Dumper($data)); # make sure we have a tag_val_sep $self->fatal_msg("No tag_val_sep!") && return unless $arg_h->{tag_val_sep}; my $tag_val_sep = $arg_h->{tag_val_sep}; my ($t, $v) = split(/\s*$tag_val_sep\s*/, $data, 2); # $self->debugme("t: ".Dumper($t)."v: ".Dumper($v)); # return if there is nothing in the tag or the value return undef unless $t =~ /\S/ || $v =~ /\S/; return { $t => $v }; } =head2 regexp_parser input: string parser arguments: - regexp # will use the default otherwise - get_field_names # map the data to the names of the fields output: [ results of parsing with regexp ] if get_field_names is on, the output will be a hash otherwise, the output is an array if the regexp doesn't produce any results, the parser returns undef =cut sub regexp_parser { my $self = shift; my $data = shift || return; return unless $data =~ /\S/; my $arg_h = shift; # make sure we have a regexp $self->fatal_msg("No regexp!") && return unless $arg_h->{regexp}; my $regexp = $arg_h->{regexp}; my $values; # use a regexp to get the data @$values = ($data =~ /$regexp/); # $self->debugme("values: ".Dumper($values)); if (! $values || ! @$values) { return undef; } return $self->map_values_to_fields($values, $arg_h) if $arg_h->{get_field_names}; return $values; } =head2 delimited_parser input: string parser arguments: - delimiter # will use the default otherwise - field_list # the fields in the string (if known) - n_fields # the number of fields to split the data into (if known) - get_field_names # map the data to the names of the fields output: results of parsing or undef if get_field_names is on, the output will be a hash otherwise, the output is an array =cut sub delimited_parser { my $self = shift; my $data = shift || return; return unless $data =~ /\S/; my $arg_h = shift; $self->fatal_msg("No delimiter!") && return unless $arg_h->{delimiter}; # $self->debugme("delimited_parser: arg_h: ".Dumper($arg_h)); # $self->debugme("input: ".Dumper($data)); # xxx-delimited data. # split the data with the delimiter my $delimiter = $arg_h->{delimiter}; my $values; # if we known how many fields to split the data into, do so if ($arg_h->{n_fields}) { @$values = split(/\s*$delimiter\s*/, $data, $arg_h->{n_fields}); } # or if we have a field list, split it into that many fields elsif ($arg_h->{field_list}) { @$values = split(/\s*$delimiter\s*/, $data, scalar @{$arg_h->{field_list}}); } # otherwise, just split it into as many pieces as there are else { @$values = split(/\s*$delimiter\s*/, $data); } if (! $values || ! @$values) { return undef; } return $self->map_values_to_fields( $values, $arg_h) if $arg_h->{get_field_names}; return $values; } =head2 subroutine_parser input: string parser arguments: - subr # the subroutine to parse with - arg_h can also contain any other relevant parameters, e.g. get_field_names output: [ results of parsing ] if get_field_names is on, the output will be a hash otherwise, the output is whatever the subroutine produces =cut # subroutine-based parsed sub subroutine_parser { my $self = shift; my $data = shift || return; return unless $data =~ /\S/; my $arg_h = shift; $self->fatal_msg("No subroutine!") && return unless $arg_h->{subr}; my $values = &{ $arg_h->{subr} }($data, $arg_h); return undef if ! $values || # an empty array ( ref $values eq 'ARRAY' && !@$values ) || # an empty hash ( ref $values eq 'HASH' && !%$values ); return $self->map_values_to_fields($values, $arg_h) if $arg_h->{get_field_names}; return $values; } ##### Processing the results... ##### sub map_values_to_fields { my $self = shift; my $values = shift || return; my $arg_h = shift || return; my $field_list = $arg_h->{field_list}; if ($arg_h->{has_header_row}) { if (!$field_list) { $arg_h->{field_list} = $values; $arg_h->{n_fields} = scalar @$values; return; } } ## we can either return an array or a hash my $return_array = $arg_h->{return_array} || undef; # $self->debugme("field_list: ".Dumper($field_list)); # $self->debugme("input: ".Dumper($values)); if (scalar @$field_list < scalar @$values) { $self->warning_msg("may miss some parsed data!"); } # if we have a field list, use the values in it as the field names my $return; my $index = 0; foreach my $f (@$field_list) { next unless defined $values->[$index]; my $f_name = $f; # if the item in the field list is a simple scalar, it's the field name # set the value in the data hash my $value = $values->[$index]; if (! ref $f ) { # fine; a simple scalar } elsif ( ref $f eq 'CODE') { # we are running another parser here # $self->debugme("input into map vals to field sub: ".Dumper($values->[$index])); $value = &$f( $value ); # $self->debugme("CODE result value: ".Dumper($value)."\n"); ## need to find out what we should do with this now! ## how were we supposed to save this? if ($arg_h->{parent_hash_merge}) { while ( my ($key, $val) = each %$value ) { push @{$return->{ $key }}, @$val; } } elsif ($arg_h->{hash_key_field}) { $f_name = $arg_h->{hash_key_field}; } else { # holy shit, what do we do now?! $self->printerr("Help! Don't know what to do with these results!"); } next; } else { $self->printerr("Unknown field structure in map_values_to_fields: ".Dumper($f)); } if ($return_array) { push @$return, { $f_name => $value }; } else { $return = $self->_merge_into_h({ $f_name => $value }, $return); # save everything as an arrayref # push @{$return->{$f_name}}, $value; } =cut else { # we'll do something more fancy with $values->[$index] my $return; if ($f->{subr}) { # there's a subroutine to be done } elsif ($f->{parser}) { # this field needs further parsing # check the parser is ok if ($self->is_valid_parser($f->{parser})) { $self->debugme("parser args: ".Dumper($f->{parser})."\n"); $return = $self->parse($f->{parser}, $values->[$index], $f->{parser}); } else { $self->debugme("Correct parser not found.\n"); # what should we do now?! push @{$data_h->{$f}}, $values->[$index]; } } else { # not doing any specific subroutine $return = $values->[$index]; } if ($f->{field_name}) { push @{$data_h->{ $f->{field_name} }}, $return; } else # some kind of hash-based thing? { while ( my ($key, $val) = each %$return) { push @{$data_h->{ $key }}, @$val; } } } =cut $index++; } # $self->debugme("about to return map_values_to_fields. looks like this:\n".Dumper($return)."\n"); return $return; } =head2 create_new_object A sub to create an object from an input hash Errors will be added to the object if creation failed. input: $self, $object_type, $arg_h containing any options output: the object, if obj creation worked otherwise, nothing =cut sub create_new_object { my $self = shift; my $obj_type = shift || "GO::Object::Generic"; my $arg_h = shift || {}; my $data = shift; # $self->startme(); # $self->debugme("create_new_object arguments: ".Dumper($arg_h)."\n"); my $results = $obj_type->new({ %$arg_h, data => $data, return_as => 'success_hash' }); return $results->{OBJECT} if $results->{SUCCESS}; # add the errors / messages to the parser if there are any $self->add_message_list($results->{ERROR_LIST}) if $results->{ERROR_LIST}; return; } ### OTHER STUFF ### #sub param { # my $self = shift; # my $pname = shift; # if (@_) # { push @{$self->{_PARAMS}{$pname}}, @_; # } # return $self->{_PARAMS}{$pname} || undef; #} =comment # a general interface for autoloading? # not implemented at present sub parse { my $self = shift; my $p_name = shift; if ($self->is_valid_parser($p_name)) { $p_name .= '_parser'; return $self->$p_name(@_); } else { die "$p_name is not a valid parser. Dying "; } } =cut ### SAVING DATA ### sub _add_to_array { my $self = shift; my $x = shift; my $data_arr = shift; # $self->debugme( # "self: ".Dumper($self) . # "data_arr: ".Dumper($data_arr) . # "data to input: ".Dumper($x) . # "\n"); # $self->debugme("data: ".Dumper($data_arr)."\nx: ".Dumper($x)."\n"); push @$data_arr, $x; # push @{$self->{__OUTPUT}}, $x; return $data_arr; } sub _merge_into_h { my $self = shift; my $x = shift; my $data_h = shift; # $self->debugme( # "self: ".Dumper($self) . # "data_h: ".Dumper($data_h) . # "data to input: ".Dumper($x) . # "\n"); if (ref $x ne 'HASH') { my $msg = "Input format is incorrect: HASH required, found ". ( ref $x || 'SCALAR'); $self->fatal_msg($msg); die $msg; } while ( my ($key, $val) = each %$x ) { if ($data_h->{$key}) { if (ref $data_h->{$key} && ref $data_h->{$key} eq 'ARRAY') { #$self->debugme("data_h->{key}: ".Dumper($data_h->{$key})."value: ".Dumper($val)); push @{$data_h->{ $key }}, $val; # push @{$self->{__OUTPUT}{ $key }}, $val; } else { $data_h->{ $key } = [ $data_h->{$key}, $val ]; } } else { $data_h->{$key} = $val; # $self->{__OUTPUT}{$key} = $val; } } # push @{$data_h->{ $x->{tag} }}, $x->{value}; return $data_h; } sub _create_h_key { my $self = shift; my $x = shift; my $data_h = shift; my $key_field = shift; # $self->debugme( # "self: ".Dumper($self) . # "data_h: ".Dumper($data_h) . # "data to input: ".Dumper($x) . # "key field: ".Dumper($key_field) . # "\n"); if ($x->{$key_field}) { my $key; if (! ref $x->{$key_field}) { $key = $x->{$key_field}; } elsif (ref $x->{$key_field} eq 'ARRAY') { # array: take the first value $key = $x->{$key_field}[0]; } else { $self->fatal_msg("Help! Unrecognized key field format! ". ref $x->{$key_field}); return $data_h; } # create the hash using the key and merge the results into the data_h return $self->_merge_into_h({ $key => $x }, $data_h); } else { $self->printerr("key field $key_field not found in record! Record not added."); } return $data_h; } # remove excess whitespace sub __trim_text { my $self = shift; my $text = shift; $text =~ s/^\s*(\S+.+\S)\s*$/$1/mg; return $text; } # check regexp is OK sub __check_regexp { my $self = shift; my $regexp = shift; my $parser_type = shift; my $copy = $regexp; eval { "" =~ /$regexp/; }; if ($@) { $self->fatal_msg("invalid regexp : $@"); return undef; } eval { qr/$regexp/; }; if ($@) { $self->fatal_msg("invalid regexp : $@"); return undef; } if ($parser_type && $parser_type eq 'regexp') { # check for any brackets in the regexp $copy = quotemeta $copy; # $self->debugme($copy); if ( $copy !~ /\(/ || $copy !~ /\)/ ) {# $self->debugme("No brackets found!"); return undef; } } return $regexp; } sub __wrap_array_op { my $sub = shift; my $vals = shift; print STDERR "wrap array op: arguments are:\nvals: ".Dumper($vals); if (ref $vals eq 'HASH') { foreach (values %$vals) { $_ = &$sub($_); } } elsif (ref $vals eq 'ARRAY') { foreach (@$vals) { $_ = &$sub($_); } } else { return &$sub($vals); } print STDERR "returning vals: ".Dumper($vals)."\n"; return $vals; } sub parse_from_form { my $self = shift; my $query = shift || return; my %q_hash = $query->Vars(); # convert form data into usable data foreach (keys %q_hash) { if (exists $q_hash{$_}) { my @temp = split("\0", $q_hash{$_}); $q_hash{$_} = [ @temp ]; } else { # delete any keys with no value delete $q_hash{$_}; } } # put in any specialized parsing stuff here return $self->_parse_from_form(\%q_hash); } sub _parse_from_form { my $self = shift; return shift; } ### PREPARING THE PARSER ### =head2 prep_parser Preparing the parser for some top quality action. input: self, arg_h output: the various subs that the parser is going to run =cut sub prep_parser { my $self = shift; my $arg_h = shift; if (! $arg_h || ref $arg_h ne 'HASH') { $self->fatal_msg("Parser specification must be a hash! Found ". (ref $arg_h || 'SCALAR')); return undef; } $self->startme(); # $self->debugme("\n\n\narguments for parser preparation:\n".Dumper($arg_h)."\n\n"); my $parser; my $err; # translate the parser, if required # convert mftv arguments if present if ($arg_h->{parser} =~ /^mftv$/i || $arg_h->{parser} =~ /^multi.field.tag.value$/i) { $arg_h = $arg_h = $self->_prep_mftv_parser($arg_h); if (!$arg_h) { $self->fatal_msg("warning: could not understand parser arguments."); return undef; } } ### Prepare the parser subroutine if (! $arg_h->{parser} || ! defined $arg_h->{parser}{type}) { $self->fatal_msg("No parser specified!"); $err++; # return undef; } else { # check if the parser is a dummy or not, and if not, check the args are OK if ($arg_h->{parser}{type} =~ /^dummy$/i) { # this is just a dummy parser to fill up space... # $self->debugme("Using a dummy parser..."); } else { # prepare the parser for action my $parser_sub = $self->_prepare_parser_sub($arg_h->{parser}); if ($parser_sub) { $parser->{parse} = $parser_sub; } else { $err++; } } } # data to filter if ($arg_h->{filter}) { ## convert this into an if_then argument if ($arg_h->{if_then}) { if (ref $arg_h->{if_then} eq 'HASH') { $arg_h->{if_then} = [ $arg_h->{if_then}, { 'if' => $arg_h->{filter}, 'when' => 'prepare' } ]; } elsif (ref $arg_h->{if_then} eq 'ARRAY') { push @{$arg_h->{if_then}}, { 'if' => $arg_h->{filter}, 'when' => 'prepare' }; } } else { $arg_h->{if_then} = { 'if' => $arg_h->{filter}, 'when' => 'prepare' }; } } if ($arg_h->{if_then}) { my $test_sub = $self->_prepare_if_then_sub($arg_h->{if_then}); if ($test_sub) { if (ref $test_sub eq 'ARRAY') { foreach (@$test_sub) { $parser->{ $_->{when} } = # sub { return &{$_->{'subr'}}( @_ ); }; $_->{'subr'}; } } else { $parser->{ $test_sub->{when} } = # sub { return &{$test_sub->{'subr'}}( @_ ); }; $test_sub->{'subr'}; } } else { $err++; #return undef; } } # prepare: any transforms we might want to do before parsing # for example, if we want to alter the input in a routine way # process: post-parsing transforms foreach ('prepare', 'post_process') #, 'process', ) { next unless $arg_h->{$_}; my $sub = $self->_prepare_sub($arg_h->{$_}, $_); if ($sub) { $parser->{$_} = $sub; } else { #$self->fatal_msg("$_ sub not created!"); $err++; #return undef; } } if ($arg_h->{'process'}) { my $sub = $self->_prepare_sub($arg_h->{process}, 'process'); if ($sub) { # we need to do this to every member of the array or hash $parser->{process} = sub { my $input = shift; # $self->debugme("input into process: ".Dumper($input)); if (ref $input eq 'ARRAY') { $_ = &$sub($_) foreach @$input; } elsif (ref $input eq 'HASH') { my $result_h; while (my ($k, $v) = each(%$input)) { my $r = &$sub({ $k => $v }); if ($r) { $result_h = $self->_merge_into_h($r, $result_h); } } $input = $result_h; } # $self->debugme("after processing: ".Dumper($input)); return $input; }; } else { $self->fatal_msg("process sub not created!"); $err++; # return undef; } } if ($arg_h->{inner_parser}) { my $inner = $self->prep_parser($arg_h->{inner_parser}); if ($inner) { ## put the inner parser into the stack of stuff to be executed ###### FIX THIS!! <-- fix what? $parser->{inner_parser} = sub { # run the parser my $results = $self->run_loop( @_, $inner ); # $self->debugme("results of the second parser: ".Dumper($results)); # return the results return $results; }; } else { $self->fatal_msg("Could not create inner_parser!"); $err++; # return undef; } } ## Let's get rid of this bit! <-- no, I think it's OK # if we want to convert the data into some other structure, do it here if ($arg_h->{return_object}) { my $return_sub = $self->_prepare_return_sub($arg_h); if ($return_sub) { $parser->{return_object} = $return_sub; } else { $self->fatal_msg("could not create return_sub!"); $err++; # return undef; } } # what kind of data structure to put the results into my $save_sub = $self->_prepare_save_sub($arg_h); if (! $save_sub ) { $err++; } else { $parser->{save} = $save_sub; } =cut if ($arg_h->{save_as}) { if ($arg_h->{save_as} eq 'hash_add') { # we need to know what to use as the key! if ($arg_h->{hash_key_field}) { $self->debugme("save method is hash add. Good to know, eh?"); # if we've got a field list, make sure that we actually have the field! if ($arg_h->{parser}{field_list} && ! grep { $arg_h->{hash_key_field} eq $_ } @{$arg_h->{parser}{field_list}}) { $self->fatal_msg("Specified hash key does not exist in field_list!"); $err++; # return undef; } else { $parser->{save} = sub { return $self->_create_h_key(@_, $arg_h->{hash_key_field}); }; } } else { $self->fatal_msg("No hash key specified for add_to_hash! Crap.\n"); $err++; # return undef; } } elsif ($arg_h->{save_as} eq 'hash_merge') { $self->debugme("save method is hash merge. Good to know, eh?\n"); $parser->{save} = sub { return $self->_merge_into_h(@_); }; } elsif ($arg_h->{save_as} eq 'parent_hash_merge') { $self->debugme("save method is parent hash merge. Good to know, eh?\n"); $parser->{save} = sub { return $_[-1]; }; } } # if (! $parser->{save} ) # { $self->debugme("No save method found. Using the default...\n"); # $parser->{save} = sub { # my $saved = shift; # my $record = shift; # push @$saved, $record; # }; # } =cut if ($err) {# $self->debugme("Found errors during parser creation"); return undef; } $Data::Dumper::Deparse = 1; # $self->debugme("\nThe parser: ".Dumper($parser)."\n"); $Data::Dumper::Deparse = 0; foreach my $sub ('prepare', 'parse', 'inner_parser', 'process', 'return_object', 'post_process') { foreach ("", "_filter") { if ($parser->{$sub.$_}) { if (ref $parser->{$sub.$_} eq 'ARRAY') { push @{$parser->{execution_stack}}, @{$parser->{$sub.$_}}; } else { push @{$parser->{execution_stack}}, $parser->{$sub.$_}; } } } } return $parser; } =head2 _prepare_parser_sub input: arg_h containing type => # the type of parser; valid parsers are regexp, delimited, tag_value, subroutine, multi_field_tag_value (mftv) delimiter => # for delimited parser; can be a string or a Regexp tag_val_sep => # ditto but for the tag_value parser regexp => # ditto for the regexp parser is_regexp => 1 # for delimiter and tag_val_sep, if they should be # treated as regular expressions subr => # subroutine for the subroutine parser OPTIONAL: get_field_names => 1 # set this if you want the data to be mapped to the # field names has_header_row => 1 # if the data is in some kind of regular format with the # first row being the field names field_list => [...] # array containing the names of the fields n_fields => # # the number of fields to split the data into if using # a delimited parser; if not set, will use the number # of fields in the field_list (if specified) =head3 Extra field_list Goodness field_list values are usually scalars, but you can also specify extra parsing of the values by putting in a hash. output: parser subroutine if all is well; otherwise undef =cut sub _prepare_parser_sub { my $self = shift; my $arg_h = shift; # check the arg_h is in the right format if (! $arg_h || ref $arg_h ne 'HASH') { $self->fatal_msg("Parser arguments must be a hash! Found " . (ref $arg_h || 'SCALAR') ); return undef; } # $self->debugme("parser arguments: ".Dumper($arg_h->{parser})); if ($arg_h->{type} eq 'regexp') { ## check for parser args!! # multiple fields, in some regexp-able format if (! defined $arg_h->{regexp}) { $self->fatal_msg("No regular expression supplied; cannot run parse"); return undef; } if (! ref $arg_h->{regexp}) { # convert a string into a regexp # add in a check that the string creates a valid regexp $arg_h->{regexp} = $self->__check_regexp($arg_h->{regexp}, 'regexp'); return undef if ! $arg_h->{regexp}; } # some other mysterious form of regexp elsif (ref $arg_h->{regexp} ne 'Regexp') { $self->fatal_msg("Cannot understand regexp in form ".ref $arg_h->{regexp}); return undef; } # $self->debugme("regexp looks like this: ".Dumper($arg_h->{regexp})); $self->__check_parser_args($arg_h); return sub { # my $record = shift; return $self->regexp_parser(shift, $arg_h); }; } # Prep delimited or tag-value parsers elsif ($arg_h->{type} eq 'delimited' || $arg_h->{type} eq 'tag_value') { my $p_sep_hash = { delimited => 'delimiter', tag_value => 'tag_val_sep' }; my $sep = $p_sep_hash->{$arg_h->{type}}; # ensure that the separator is defined if it does exist # delete it if it exists but isn't defined if (exists $arg_h->{$sep} && ( ! defined $arg_h->{$sep} || length $arg_h->{$sep} == 0 )) { $self->warning_msg("$sep exists but is undefined; using default"); delete $arg_h->{$sep}; } # if it doesn't exist, use the default if (! $arg_h->{$sep} ) { $arg_h->{$sep} = $self->default_value($sep); } # if the separator is a text string... if (! ref $arg_h->{$sep}) { # if it's not a regular expression, quote metacharacters if (! $arg_h->{is_regexp} ) { $arg_h->{$sep} = quotemeta $arg_h->{$sep}; } # convert the string into a regexp # add in a check that the string creates a valid regexp $arg_h->{$sep} = $self->__check_regexp($arg_h->{$sep}); return undef if ! $arg_h->{$sep}; } # some other mysterious form of separator - return an error elsif (ref $arg_h->{$sep} ne 'Regexp') { $self->fatal_msg("Cannot understand $sep in form ".ref $arg_h->{$sep}); return undef; } ## otherwise, we're dealing with Regexps, which should be fine. $self->__check_parser_args($arg_h); my $p_type = $arg_h->{type} .= "_parser"; # create the subroutine... return sub { return $self->$p_type(shift, $arg_h); }; } elsif ($arg_h->{type} eq 'subroutine') { # check that the subroutine is defined and that it's a code reference if (! $arg_h->{subr} || ( $arg_h->{subr} && ref $arg_h->{subr} ne 'CODE' ) ) { $self->fatal_msg("No valid subr defined for subroutine parser!"); return undef; } return $self->__return_eval_sub($arg_h->{subr}, "subroutine parser", @_); return sub { my $result; eval { $result = &{$arg_h->{subr}}(@_) }; return $result unless $@; if ($@) { # we actually want to stop completely here, so let's die die "Subroutine parser encountered an error: $@"; # $self->fatal_msg("Subroutine parser encountered an error: $@"); # return undef; } }; } else # we don't know the parser type! { $self->fatal_msg("Parser type is unknown or undefined!\n".Dumper($arg_h->{type})); return undef; } } =head2 __check_parser_args Ensures that the correct arguments are supplied for the parser =cut sub __check_parser_args { my $self = shift; my $parser_args = shift; # depending on what format we want our results in, we may want to attach # the values from the field_list to our data if ( # ($parser_args->{return_as} && $parser_args->{return_as} =~ /hash/) || $parser_args->{get_field_names} || $parser_args->{has_header_row} || $parser_args->{field_list} ) { ## we want to match up the field names with the values $parser_args->{get_field_names} = 1; # $self->debugme("Setting the get_field_names flag"); } # if we are doing a parse involving a field_list, see if there's going to be # another parse if ($parser_args->{field_list}) {# $self->debugme("Found a field list!"); foreach my $f ( @{$parser_args->{field_list}} ) { # if it's a plain scalar, there's nothing more to do next unless ref($f); if (ref $f eq 'HASH') { # we'll make the blind assumption that the hash is a parser hash my $inner_parser = $self->prep_parser($f); if ($inner_parser) {# $self->debugme("inner_parser: ".Dumper($inner_parser)); # put this parser stuff in # $parser->{process} = ## create a new parser $f = sub { # run the parser my $results = $self->run_loop( @_ , $inner_parser ); # $self->debugme("results of the second parser: ".Dumper($results)); # return the results return $results; }; # we need to know what to do with the results if ($f->{hash_key_field}) { $parser_args->{hash_key_field} = $f->{hash_key_field}; } elsif ($f->{save_as} && $f->{save_as} eq 'parent_hash_merge') { $parser_args->{parent_hash_merge} = 1; } else { # help! what should we do?! $self->printerr("Help! Don't know how to deal with the results of this sub!"); } } else { $self->fatal_msg("inner parser subr not created!"); return undef; } } else { $self->fatal_msg("Data in field_list must be HASH or SCALAR, not ".ref $f); return undef; } } } } =head2 _prep_mftv_parser Preparing the multi-field-tag-value parser We've used shortcut tags here, so change it back into the standard format There are two steps in this parse: 1) split each record with \n to produce a set of fields (done by $p1) 2) split each field into tag-value pairs ($p2) =cut sub _prep_mftv_parser { my $self = shift; my $arg_h = shift; $self->startme(); my $p1; my $p2; # $self->debugme("arg_h: ".Dumper($arg_h)); foreach ('filter', 'prepare', 'process', 'if_then', 'post_process') { if ($arg_h->{$_ . "_record"}) { $p1->{$_} = $arg_h->{$_ . "_record"}; } if ($arg_h->{$_ . "_field"}) { $p2->{$_} = $arg_h->{$_ . "_field"}; } } $p1->{parser} = { type => 'delimited', delimiter => $arg_h->{field_sep} || "\n", return_as => 'values_only' }; $p2->{parser} = { type => 'tag_value', tag_val_sep => $arg_h->{tag_val_sep} || ":" }; $p2->{save_as} = 'hash_merge'; if ($arg_h->{return_record_as_object}) { $p1->{return_object} = $arg_h->{return_record_as_object}; } $p1->{inner_parser} = $p2; $self->debugme("mftv parser:\n".Dumper($p1)."\n"); return $p1; } =head2 _prepare_if_then_sub Prepare a test subroutine input: self, arg_h output: either undef or a subroutine reference input format: if_then => { if => < the test here > then => < action to perform, CODE ref > when => 'stage_name' # defaults to 'prepare' if not set } valid inputs for 'if': scalar - assumed to be a text string that the value must match regexp - ditto code - a subroutine reference array - each item in the array is examined and converted into a subroutine; the array is treated as an OR query, i.e. if one item succeeds, the whole query succeeds hash - a boolean query, formatted for GO::Boolean valid inputs for 'then': code - a subroutine to be executed if the test returns true hash - a dispatch table with values as keys and subroutines to be executed if the result is equal to the key valid inputs for 'when': prepare, post_process, # parse, process, save The test is done *before* the stage in each case =cut sub _prepare_if_then_sub { my $self = shift; my $test_args = shift || return; # the test criteria $self->debugme("test_args: ".Dumper($test_args)); # the test args must either be an array or a hash if (ref $test_args && ref $test_args eq 'ARRAY') { # if we have an array, check that each member of the array is a valid # if-else sub my $test_arr; foreach (@$test_args) { if (! ref $_ || ref $_ ne 'HASH') { # we're not getting into one of those recursion situations, sorry $self->fatal_msg("if_then sub must be of type HASH, not " . ref $_ || 'SCALAR'); } else { my $t = $self->_prepare_if_then_sub($_); push @$test_arr, $t if $t; } } if (scalar @$test_args == scalar @$test_arr) { # phew! return $test_arr; } else { return undef; } } elsif (! ref $test_args || ref $test_args ne 'HASH') { $self->fatal_msg("if_then sub must be of type HASH, not " . ref $test_args || 'SCALAR'); return undef; } # check that we have a test to perform if (! $test_args->{'if'}) { $self->fatal_msg("No test found in if_then arguments"); return undef; } my $if_then_sub; ### Check that the test sub is OK my $test_sub = $self->__check_test( $test_args->{'if'} ); return undef if ! $test_sub; # see what we're going to do with the results ## 'then' must be a hash or code if (! $test_args->{'then'}) { # this is a basic filter; if there is a result from the sub, continue; # otherwise, go on to the next item to test $if_then_sub = sub { my $value = shift; my $result = &$test_sub($value); return $value if $result; return undef; }; } elsif (! ref $test_args->{'then'} ) { $self->fatal_msg("if_then 'then' data should be of type HASH or CODE, not SCALAR"); return undef; } elsif (ref $test_args->{'then'} eq 'CODE') { my $action = $test_args->{'then'}; # fine, no need to do anything $if_then_sub = sub { my @args = @_; my $result = &$test_sub(@args); if ($result) { # if there is a result, do the action return #&$action(@args); $self->__return_eval_sub($action, "if_then action sub", @args); } return undef; }; } elsif (ref $test_args->{'then'} eq 'HASH') { # see if we're dealing with an inner parser... # this may well be a whole new and exciting world we're entering into! if ($test_args->{'then'}{'inner_parser'}) # we've got an inner parser on the go... woohoo! { my $inner = $self->prep_parser($test_args->{'then'}{inner_parser}); return undef if ! $inner; # otherwise, run the parser on the results my $loop_sub = 'run_loop'; if ($test_args->{'when'} && $test_args->{'when'} eq 'post_process') { $loop_sub = 'run_loop_single_item'; } $if_then_sub = sub { my $value = shift; print STDERR "value looks like this: ".Dumper ($value) ."\n"; my $result = &$test_sub($value); if ($result) { return $self->$loop_sub( $value , $inner); } return undef; }; } else { # if the item returns this result, do this # check that what is in the 'action' is a valid thing to do foreach my $action (values %{$test_args->{'then'}}) { if (! ref $action) { $self->fatal_msg("if_then 'then' hash value should be of type CODE, not SCALAR"); return undef; } elsif (ref $action eq 'HASH') { if ($action->{'inner_parser'}) # we've got an inner parser on the go... woohoo! { # make sure the inner parser passes muster my $inner = $self->prep_parser($action->{inner_parser}); return undef if ! $inner; # otherwise, run the parser on the results my $loop_sub = 'run_loop'; if ($test_args->{'when'} && $test_args->{'when'} eq 'post_process') { $loop_sub = 'run_loop_single_item'; } # convert 'action' into a subroutine $action = sub { my $value = shift; # $self->debugme("value looks like this: ".Dumper ($value)); my $result = &$test_sub($value); if ($result) { return $self->$loop_sub( $value , $inner ); } return undef; }; } else { $self->fatal_msg("Don't know what to do with action hash!"); return undef; } } elsif (ref $action eq 'CODE') { # a code ref: we're fine } else { $self->fatal_msg("if_then 'then' hash value should be of type CODE or HASH, not ". ref $action); return undef; } } $if_then_sub = sub { my @args = @_; my $result = &$test_sub(@args); if ($result) { if ($test_args->{'then'}{$result}) { # if the result exists in the action hash, do it return $self->__return_eval_sub($test_args->{'then'}{$result}, "if_then action sub", @args); } # $self->debugme("got a result, but it wasn't one we expected!"); if ($test_args->{'then'}{''}) { return $self->__return_eval_sub($test_args->{'then'}{""}, "if_then action sub", @args); } } return undef; }; } } else { $self->fatal_msg("if_then 'then' data should be of type HASH or CODE, not ". ref $test_args->{'then'}); } my $when; if ( $test_args->{when} ) { # this is when the test should be executed # allowable times # BEFORE prepare # BEFORE parse # AFTER parse, before any processing the parse results # BEFORE post-process # BEFORE save my $times = [ 'prepare', 'post_process' ]; #, 'parse', 'process', 'post_process', 'save' ]; if (ref $test_args->{when}) { $self->fatal_msg("if_then when argument should be of type SCALAR, not ". ref $test_args->{when}); return undef; } elsif ( ! grep { $test_args->{when} eq $_ } @$times ) { $self->fatal_msg("if_then when argument '".$test_args->{when}."' not understood! Please choose one of the following: ".join(", ", @$times)); return undef; } else { # phew! We're ok $when = $test_args->{when} . "_filter"; } } else { # assume it is a prepare filter $when = "prepare_filter"; } # phew! We're ok return { when => $when, subr => $if_then_sub }; } =head2 __check_test Makes sure that test criteria are in the correct format input: self, arg_h output: either undef or a subroutine reference Valid inputs: scalar - assumed to be a text string that the value must match regexp - ditto code - a subroutine reference array - each item in the array is examined and converted into a subroutine; the array is treated as an OR query, i.e. if one item succeeds, the whole query succeeds hash - a boolean query, formatted for GO::Boolean =cut sub __check_test { my $self = shift; my $test = shift || return; # the test criteria # convert the test bits into subroutines if (! ref($test) ) { # a single value # turn this into a subroutine that we can execute # make sure that we have a valid regexp $test = $self->__check_regexp($test); return undef if ! $test; return sub { my $val = shift; return $val =~ /$test/; }; } elsif (ref $test eq 'Regexp') { return sub { my $val = shift; return $val =~ /$test/; }; } elsif (ref $test eq 'CODE') { return sub { my $val = shift; my $result; eval { $result = &$test($val) }; if ($@) { die "Got an error from a subroutine: $@"; } return $result || undef; }; } elsif (ref $test eq 'ARRAY') { my $new_test_arr; foreach my $i (@$test) { my $subr = $self->__check_test($i); push @$new_test_arr, $subr if $subr; } if (!$new_test_arr || ! @$new_test_arr || scalar @$new_test_arr != scalar @$test) { $self->fatal_msg("Invalid tests found in test array"); return undef; } # at present this is an OR test, # i.e. if one test passes, the whole lot do # check that it's a valid query my $bool_query = GO::Boolean::check_boolean_query( { OR => [ map { { FN => $_ } } @$new_test_arr ] } ); if (!$bool_query) { $self->fatal_msg("Test arguments are invalid"); return undef; } return sub { my $val = shift; return GO::Boolean::run_boolean_query($bool_query, { test => $val } ); }; } elsif (ref $test eq 'HASH') { if ($test->{FN}) { # it looks like we're using the hash construction from # GO::Boolean::run_boolean_query # unfortunately we don't know what the operator is. Make it 'OR' by # default # check that it's a valid query my $bool_query = GO::Boolean::check_boolean_query({ OR => [ $test ] }); if (!$bool_query) { $self->fatal_msg("Test arguments are invalid"); return undef; } return sub { my $val = shift; return GO::Boolean::run_boolean_query( $bool_query, { test => $val } ); }; } elsif ($test->{AND} || $test->{OR} || $test->{NOT}) { # it looks like we're using the hash construction from # GO::Boolean::run_boolean_query # let's check that it's a valid query my $bool_query = GO::Boolean::check_boolean_query($test); if (!$bool_query) { $self->fatal_msg("Test arguments are invalid"); return undef; } return sub { my $val = shift; return GO::Boolean::run_boolean_query($bool_query, { test => $val }); }; } else { $self->fatal_msg("Test arguments are invalid:\n".Dumper( $test ) ); return undef; } } else {# $self->debugme("Unrecognized test type: ". ref($test) ."\n"); } return undef; } =head2 _prepare_return_sub Prepare a subroutine to create a new object from the data input: self, arg_h with return_object => 'object_name' specified output: the object or undef if no object creation results =cut sub _prepare_return_sub { my $self = shift; $self->startme; my $arg_h = shift; # make sure we have arguments and a return_object specified if (! $arg_h || ! %$arg_h || ! defined $arg_h->{return_object} || ref $arg_h->{return_object} ) { $self->fatal_msg("No return object specified in return sub!"); return undef; } my $obj_type = $arg_h->{return_object}; # $self->debugme("object type: $obj_type\n"); # check that the object actually exists! eval "require $obj_type"; if ($@) { $self->fatal_msg("$obj_type is not installed on this system: $@"); return undef; } ## it exists... phew! ## get various bits and bobs for object creation and return return sub { # my $new = return $self->create_new_object( $obj_type, { object_spec => $arg_h->{object_spec} || $obj_type->get_spec(), dfv_profile => $arg_h->{dfv_profile} || $obj_type->dfv_profile(), transform_data => 1, return_as => 'success_hash', check_input => 1, }, shift); # $self->debugme("new object: ".Dumper($new)); # return $new; }; } =head2 _prepare_sub Gets the subroutines used by the parser ready for action =cut sub _prepare_sub { my $self = shift; my $args = shift || return; my $sub_type = shift; # $self->debugme("args: ".Dumper($args)."sub_type: ".Dumper($sub_type)."\n"); if (! ref $args) { $self->fatal_msg("could not understand argument $args"); return undef; } elsif (ref $args eq 'CODE') { # fine, this is a subroutine; we can return it as it is return sub { $self->__return_eval_sub($args, "custom $sub_type sub", @_) }; } elsif (ref $args eq 'ARRAY') { my @new_fns; # check all these mofos are OK foreach (@$args) { my $new = $self->_prepare_sub($_); if ($new) { push @new_fns, $new; } else { $self->fatal_msg("could not identify fn in $_\n"); return undef; } } return sub { # iterate through the code until either all the subs are complete # or we hit some kind of error my $input = shift; foreach (@new_fns) { my $result = &$_($input); return undef unless $result; $input = $result; } return $input; }; } elsif (ref($args) eq 'HASH') { if ($args->{parser}) { # this may well be another parser. How thrilling! my $parser = $self->prep_parser($_); if ($parser) { # our parser was OK return sub { return $self->run_loop( @_, $parser ); }; } else { $self->fatal_msg("prep_parser failed!"); return undef; } } # elsif ($args->{filter}) # { # this is a filter subr # my $filter = $self->_prepare_filter_sub($args->{filter}); # if (!$filter) # { # die # # } # elsif ($args->{boolean}) # { # run a boolean sub # } else { $self->fatal_msg("could not understand parser argument hash\n".Dumper($args)); return undef; } } else { $self->fatal_msg("argument type is invalid: ".ref($args)); return undef; } return undef; } =head2 _prepare_save_sub How to save the data =cut sub _prepare_save_sub { my $self = shift; my $arg_h = shift; # what kind of data structure to put the results into if ($arg_h->{save_as}) { if ($arg_h->{save_as} eq 'hash_add') { # we need to know what to use as the key! if (! $arg_h->{hash_key_field}) { $self->fatal_msg("No hash key specified for hash_add! Crap."); return undef; } # if we've got a field list, make sure that we actually have the field! if ($arg_h->{parser_args}{field_list} && ! grep { $arg_h->{hash_key_field} eq $_ } @{$arg_h->{parser_args}{field_list}}) { $self->fatal_msg("Specified hash key does not exist in field_list!"); return undef; } # $self->debugme("save method is hash add. Good to know, eh?"); return sub { return $self->_create_h_key(@_, $arg_h->{hash_key_field}) }; } elsif ($arg_h->{save_as} eq 'hash_merge') {# $self->debugme("save method is hash merge. Good to know, eh?"); return sub { return $self->_merge_into_h(@_) }; } elsif ($arg_h->{save_as} eq 'parent_hash_merge') {# $self->debugme("save method is parent hash merge. Good to know, eh?"); return sub { return $_[-1] }; } elsif ($arg_h->{save_as} eq 'array') { #delete $arg_h->{save_as}; return sub { return $self->_add_to_array(@_) }; } else {# $self->debugme("Do not understand the save argument ".$arg_h->{save_as}); return undef; } } # nothing specified... return sub { return $self->_add_to_array(@_) }; } =head2 __return_eval_sub Wraps a subroutine in an eval =cut sub __return_eval_sub { my $self = shift; my $sub = shift; my $sub_name = shift || "subroutine"; # return # sub { my $result; eval { $result = &$sub(@_) }; return $result unless $@; if ($@) { # we actually want to stop completely here, so let's die die "Encountered an error in " . $sub_name . ": $@"; } # }; } 1;