package GO::Tool::ToolsTool; use strict; use lib 'go/scratch/tools'; use base 'GO::Tool::GenericGoTool'; ### Various modules required use Data::Dumper; use CGI::Carp qw(fatalsToBrowser); use GO::Object::Tool; # use GO::Object::ToolSearch; use GO::TestSet qw( dfv_test); use GO::Utilities qw(:all); use GO::Boolean; use GO::GeneralPurposeParser; use URI::Escape; #use Utility::TSLParser; my $default_sort_order = [ 'year_asc', 'author', 'title' ]; ### tool db file my $tool_db = "/Users/gwg/go/doc/tool-data.txt"; ### the tool master, official adder of entries to the GO tool db ### new tool will be sent by email to this address for checking my $tool_master = ""; ### extra data my $extra_data = { display_labels => { }, }; sub _specification { my $self = shift; my $form_values = { 'sort' => [ qw( name ) ], 'group_by' => [ qw( none compatible_os feature tool_type ) ], 'op' => [ qw( exact and or ) ], page_size => [ qw( 10 20 50 100 all ) ], 'feature' => [ 'ont_view', 'annot_view', 'ont_edit', 'annot_edit', 'godb', 'stat', 'sw', 'textmine', 'data', 'other_feature' ], 'license' => [ 'free_academic', 'proprietary', ], compatible_os => [ 'win', 'mac', 'unix', 'linux' ], tool_type => [ 'online', 'standalone' ], }; return ( 'rm', { test => dfv_test('is_in_list_p', { list => [ qw(query_form browse search) ], this => 1 }), default => 'query_form', dependencies => { "search" => [ qw( query ) ], }, }, ## SEARCH STUFF 'query', { search => { test => qr/\w{2,}/, # number of consecutive work characters must be greater than 1 required => 1, }, # '*' => { # type => 'scalar', # }, }, 'op', { search => { test => dfv_test('is_in_list_p', { list => $form_values->{'op'}, this => 1 }), default => 'or', form_values => $form_values->{'op'}, }, }, ## General display bits 'sort', { test => [ dfv_test('is_in_list_p', { list => $form_values->{'sort'}, this => 1 }), $self->dfv_do_sub('check_sort_params', { params_as_hash => ['sort', 'group_by' ] }) ], # default => $default_sort_order->[0], form_values => , }, 'sort2', { test => [ dfv_test('is_in_list_p', { list => $form_values->{'sort'}, this => 1 }), $self->dfv_do_sub('check_sort_params', { params_as_hash => ['sort', 'sort2', 'group_by' ] }) ], form_values => [ qw( year_asc author title year_desc ) ], dependencies => [ 'sort' ], }, 'group_by', { test => dfv_test('is_in_list_p', { list => $form_values->{group_by}, this => 1 }), default => 'none', form_values => $form_values->{group_by}, }, 'page', { test => qr/^\d+$/, # must be a number default => '1', }, 'page_size', { test => qr/^(\d+|all)$/, # must be a number or all default => '20', form_values => [ qw( 10 20 50 100 all ) ], }, 'show_?????', { test => qr/^(1|true|on)$/, }, ## tool-specific data "name", { test => qr/\w{2,}/, # must contain more than two consecutive word chars required => 1, }, "url", { test => dfv_test('is_an_url_p'), required => 1, }, "email", { test => [ qr/\S+@\S+\.\S+/ ],#email(), required => 1, }, # "developer_list", { "developer", { test => dfv_test('is_a_subclass_of_p', { class => 'GO::Object::Developer', this => 1 }), required => 1, allow_multiple => 1, }, "license", { test => dfv_test('is_in_list_p', { list => $form_values->{license}, this => 1 }), form_values => $form_values->{license}, required => 1, }, "description", { test => dfv_test('is_a_string_p'), required => 1, }, "compatible_os", { test => dfv_test('is_in_list_p', { list => $form_values->{compatible_os}, this => 1 }), implies => { 'is_standalone_tool' => 1 }, name => 'Compatible operating systems', allow_multiple => 1, }, "feature", { test => dfv_test('is_in_list_p', { list => $form_values->{feature}, this => 1 }), form_values => $form_values->{feature}, required => 1, allow_multiple => 1, }, "publication", { test => dfv_test('is_a_string_p'), # valid_prefixes => [ 'pmid', 'doi' ], allow_multiple => 1, }, # generated automatically "name_lc", { type => 'automatic', }, "name_acronym", { type => 'automatic', test => dfv_test('is_a_string_p'), name => 'Acronym', }, "name_abbr", { type => 'automatic', test => dfv_test('is_a_string_p'), name => 'Abbreviated name', }, "submission_date", { # get this from the CGI type => 'automatic', test => dfv_test('is_a_date_p'), }, "updated", { # get this from the CGI? type => 'automatic', test => dfv_test('is_a_date_p'), }, # optional, boolean "is_open_source", { test => qr/^(1|true)$/, }, "is_standalone_tool", { test => qr/^(1|true)$/, }, ); } =head2 Tools tool Run modes: - query_form: initial page with the search / browse options on it use CGI values if there are any, or the defaults if not - browse: load the tools if 'group_by' is set, filter by the group_by crit sort the list get the subset (using page / page_size) if reqd - search: get the search query (query, op) and create the appropriate query object load the tools perform the search if 'group_by' is set, filter by the group_by crit sort the list get the subset (using page / page_size) if reqd =cut sub setup { my $self = shift; $self->start_mode('query_form'); $self->mode_param(\&set_run_mode); $self->error_mode('die_and_output_error'); $self->run_modes( 'query_form' => 'query_form', 'browse' => 'browse', 'search' => 'search', 'fatal_error' => 'die_and_output_error', 'AUTOLOAD' => 'query_form', ); $self->set_verbosity('superverbose'); } sub set_run_mode { my $self = shift; my %cgi_h = $self->query->Vars; if (defined $cgi_h{rm}) { return $cgi_h{rm}; } elsif (exists $cgi_h{query}) { return 'search'; } return 'query_form'; } # configure the template before we do anything else sub cgiapp_init { my $self = shift; $self->tt_config( TEMPLATE_OPTIONS => { INCLUDE_PATH=> [ qw( /Users/gwg/go/scratch/tools/templates/ /Users/gwg/go/www/) ], TRIM=>1, }, ); } ## stuff to do before we run the page-specific code sub cgiapp_prerun { my $self = shift; $self->startme; $self->debugme("Current runmode: ".$self->get_current_runmode."\nquery: ".Dumper( $self->query() ) ); # check the input query, replace dodgy params with the defaults my $results = $self->check_input_query({ replace_with_defaults => 1, spec_profile => $self->get_current_runmode }); $self->debugme("success status: ".Dumper($results->success)); if (!$results->success) { # set the mode to 'query_form' $self->prerun_mode('query_form'); # create an error message my $err_str; # if the mode is search and we have problemos with the query, # go back to showing the query form if ($self->get_current_runmode eq 'search' && ! $results->valid('query') ) { if ($results->missing('query')) { $err_str = 'You have not entered a query.'; } elsif ($results->invalid('query')) { $err_str = 'You entered an invalid query.'; } # update the validated query to reflect this my $query_h = $self->param('validated_query'); $query_h->{rm} = 'query_form'; $self->param('validated_query', $query_h); } if (!$err_str) { $err_str = join "
", @{ GO::Utilities::summarize_errors($results) }; } # $self->fatal_error($err_str); } # create / update the sort order $self->create_sort_order; $self->debugme("run mode: ".$self->prerun_mode); } sub search { my $self = shift; return $self->retrieve_data({ search => 1 }); } sub browse { my $self = shift; return $self->retrieve_data; } =head2 query_form The 'front page' of the tools tool, with the search, browse and viewing options =cut sub query_form { my $self = shift; $self->startme; # get the other bits and pieces for the template and output it return $self->tt_process("view_tools.tmpl", $self->set_up_output) || die "Template toolkit messed up, the bastard!: ", $self->tt_obj->error(), "Dying"; } =head2 retrieve_data Function to get data from the DB, either for browsing or by searching. input: $self, optional flag which will trigger a search if present; otherwise, the operation will be browse. - browse: load the tools if 'group_by' is set, filter by the group_by crit sort the list get the subset (using page / page_size) if reqd - search: load the tools get the search query (query, op) and create the appropriate query object perform the search if 'group_by' is set, filter by the group_by crit sort the list get the subset (using page / page_size) if reqd =cut sub retrieve_data { my $self = shift; $self->startme; my $options = shift; my $output; # load the tools; dies if no tools are present my $tool_arr = $self->_get_tool_data; my $tool_h; # set the number of the entries in the tools db $self->param("n_tools", scalar @$tool_arr ); # get the checked query my $query_h = $self->param('validated_query'); # $self->debugme("query_h: ". Dumper($query_h)); # $output->{query_h} = $query_h; if ($options->{search}) # this is a search query { # create the search query using the data in parsed query data # the params that we need in the query are: # - op # - query my $query_struct; # the structure containing the query we're going to perform my @search_strings; # let's have a look at our search params my $op = $query_h->{op}; if ($op eq 'exact') { # exact query. Keep all the words together push @search_strings, $query_h->{query}; $op = 'and'; } elsif ($op eq 'or' || $op eq 'and') { @search_strings = split(/\s+/, $query_h->{query}); } # create the query $query_struct = { uc($op) => [ map { { PARAM => 'all_params_as_text', FN => \&GO::TestSet::test_param, ARGS => [ 'is_in_string_p', { string => $_ }], } } @search_strings ], }; $self->debugme("query_struct: ".Dumper($query_struct)); # turn tool_arr into a hash (for ease of access/use) # my $tool_h; my $acc = 1; $acc++ and $tool_h->{$acc} = $_ foreach @$tool_arr; # perform the query my $result_h = GO::Boolean::run_boolean_query( $query_struct, $tool_h ); # $self->debugme("result_h: ".Dumper([ keys %{$output->{result_h}} ])); # return if there are no results if (!$result_h || ! values %$result_h) { $self->fatal_error({ MSG_CODE => 'no_search_results' }); return $self->tt_process("view_tools.tmpl", $self->set_up_output($output)) || die "Template toolkit messed up, the bastard!: ", $self->tt_obj->error(), "\n"; } # otherwise, reset $tool_arr to be the search results $tool_arr = [ values %$result_h ]; $self->param('n_results', scalar values %$result_h); } # if we have a group_by param, add it to the fray # sort the results into their groups, and then pick the appropriate group to # return as the results if ($query_h->{group_by} && $query_h->{group_by} ne 'none') { # my $grouped_data = $self->group_data_by_param( $tool_arr, { PARAM => $query_h->{group_by} } ); $self->debugme("grouped data keys: ".Dumper( [ keys %$grouped_data ] )); $self->debugme("None value: ". Dumper( $grouped_data->{NONE} )); # make an index with values and counts my $index; foreach ( sort keys %$grouped_data ) { push @$index, { value => $_, title => $extra_data->{display_labels}{$_} || $_, count => scalar @{$grouped_data->{$_}} }; } $output->{grouped_data_index} = $index; # get the subset of the results that we want # reset tool_arr to the new result set if ($query_h->{ $query_h->{group_by} }) { #undef $tool_arr; if (ref($query_h->{ $query_h->{group_by} }) eq 'ARRAY') { # create a query and get the matching items # need to decide whether it's an AND or an OR operation... # create a hash of the entries if one hasn't already been made # (for ease of access/use) if (!$tool_h) { my $acc = 1; $acc++ and $tool_h->{$acc} = $_ foreach @$tool_arr; } my $query = { AND => [ map { { FN => \&GO::TestSet::test_param, ARGS => ['is_the_same_as_p', { string => $_ }], PARAM => $query_h->{group_by} } } @{$query_h->{ $query_h->{group_by} }} ], }; my $result_h = GO::Boolean::run_boolean_query( $query, $tool_h ); if ($result_h && %$result_h) { $tool_arr = [ values %$result_h ];# if $result_h && %$result_h; } else { undef $tool_arr; } } else { # see if this value is present or not # set tool_arr to that value if it is present $tool_arr = $grouped_data->{ $query_h->{ $query_h->{group_by} } } || undef; #if $grouped_data->{ $query_h->{ $query_h->{group_by} } }; } # add an error message if no results in that category if (!$tool_arr) { $self->warning_msg('There are no results with the chosen grouping value.'); } } else { # if we have 'group_by' set, but don't have a value # use the first one in the list $tool_arr = $grouped_data->{ $index->[0]{value} }; # set this value in query_h $query_h->{ $query_h->{group_by} } = $index->[0]{value}; $self->param('validated_query', $query_h); } } # $self->debugme("\$tool_arr: ".Dumper($tool_arr)); if ($tool_arr) { # work out what our sort order should be # query_h->{default_sort_order} # year_asc - this is fine # year_desc - we want sprintf("%05d", (100000 - $_->year)) my $sort_order = $self->param('sort_order'); $self->debugme("sort_order: ".Dumper($sort_order)); # remap the 'year' params my $remapping = { 'year_asc' => 'year', 'year_desc' => { do_fn => { fn => sub { my $number = shift; $number = 10000 - $number; return sprintf("%05d", $number) },#(10000 - shift)) }, obj => { has_fn => 'year' }, }, }, }; foreach (@$sort_order) { $_ = $remapping->{$_} if $remapping->{$_}; } my $sorted_list = GO::Utilities::sort_list({ list => $tool_arr, crit => $sort_order }); # $self->debugme("sorted_list: ".Dumper($sorted_list)); # get the subset of the results, if appropriate my $paged = get_results_chunk($sorted_list, { chunk_n => $query_h->{page}, chunk_size => $query_h->{page_size} }); $output->{tool_arr} = $paged->{subset}; $output->{n_pages} = $paged->{n_chunks}; $output->{page} = $paged->{chunk_n}; } # set up all the other stuff and output the template return $self->tt_process("view_tools.tmpl", $self->set_up_output($output)) || die "Template toolkit messed up, the bastard!: ", $self->tt_obj->error(), "\n"; # return $self->tt_process($template_to_use.".tmpl", $output) || die "Template toolkit messed up, the bastard!: ", $self->tt_obj->error(), "\n"; } =head2 tt_pre_process { Populates various useful variables from the environment Args: $template_vars # any data to go in the template =cut #sub tt_pre_process { sub set_up_output { my $self = shift; $self->startme; my $tmpl_vars = shift || {}; # my $template_to_use = shift || 'view_tool'; my $spec = $self->get_spec; # the checked query into $output $tmpl_vars->{query_h} = $self->param('validated_query'); # get the number of entries in the tool and the number of search results (if # appropriate) $tmpl_vars->{n_tools} = $self->param("n_tools") if $self->param("n_tools"); $tmpl_vars->{n_results} = $self->param("n_results") if $self->param("n_results"); $tmpl_vars = { %$tmpl_vars, base_url => 'http://127.0.0.1/cgi-bin/tools.cgi', install_dir => 'http://127.0.0.1/go/', }; # if there are errors, add them to the template $tmpl_vars->{message} = $self->get_all_msgs('show_level') if $self->has_msgs; if ($self->param("n_results")) { ### set up URLs for paging / browsing / etc. my $url_h; foreach my $p (keys %$spec) { # if the parameter is set AND there's no default or the parameter differs # from the default, add the param to the URL if ($tmpl_vars->{query_h}{$p} && (!$spec->{$p}{default} || # no default ($spec->{$p}{default} && # default, param differs from default $spec->{$p}{default} ne $tmpl_vars->{query_h}{$p}) ) ) { $url_h->{$p} = uri_escape($tmpl_vars->{query_h}{$p}); } } # paging URLs: need everything except page number delete $url_h->{page} if $url_h->{page}; $tmpl_vars->{url_for_paging} = join( "&", map { my $k = $_; if (ref($url_h->{$k}) eq 'ARRAY') { join( "&", map { $k . "=" . $_ } @{$url_h->{$k}} ); } else { $k . "=" . $url_h->{$k}; } } keys %$url_h ); $self->debugme("url for paging: ".Dumper($tmpl_vars->{url_for_paging})); # if we have a grouping parameter set if ($url_h->{group_by} && $url_h->{group_by} ne 'none') { # grouping URLs: don't need page or category/year delete $url_h->{category}; delete $url_h->{year}; $tmpl_vars->{url_for_grouping} = join( "&", map { $_ . "=" . $url_h->{$_} } keys %$url_h ); $self->debugme("url_for_grouping: ".Dumper($tmpl_vars->{url_for_grouping})); } } ### get the bits for the form # misc stuff: what to show, etc., and paging foreach my $x qw(show_category show_comments link_out page) { $tmpl_vars->{$x} = $tmpl_vars->{query_h}{$x} if $tmpl_vars->{query_h}{$x}; } # sort settings and options # how to group results # page size foreach my $var qw( sort sort2 group_by page_size op) { $tmpl_vars->{$var} = $tmpl_vars->{query_h}{$var}; foreach (@{$spec->{$var}{form_values}}) { my $h = { value => $_ }; $h->{label} = $extra_data->{display_labels}{$_} || $_; $h->{selected} = 1 if $tmpl_vars->{query_h}{$var} eq $_; push @{$tmpl_vars->{ $var.'_options'}}, $h; } } return $tmpl_vars; } =head2 _get_tool_data Read in the tool db input : self output: tool data, as an array of Tool(Search) objects =cut sub _get_tool_data { my $self = shift; my $parser = GO::GeneralPurposeParser->new; # set the object type. If we are doing a search, use GO::ToolSearch, # which has a field with all params as a text string my $obj_type = 'GO::Object::Tool'; # if ($self->get_current_runmode eq 'search') # { $obj_type = 'GO::Object::ToolSearch'; # } # read in the tools file my $tool_arr = $parser->parse_from_file({ file => $tool_db, return_as => $obj_type }); # if ($parser->has_msgs) # { $self->debugme("error list: ".Dumper($parser->get_all_msgs)); # } if (!$tool_arr || !@$tool_arr) { # die with an error $self->printerr("FATAL: No tool data found in $tool_db"); die "No valid data could be found. Please check $tool_db and try again."; } return $tool_arr; } =head2 check_sort_params Ensure that we don't have any duplication in the sort params and the group by params input: sort, sort2 and group_by from the query_h output: returns 1 if all the params mentioned are unique =cut sub check_sort_params { my @vars = @_; my $seen_h; foreach (@vars) { $_ =~ s/year.+/year/; return if $seen_h->{$_}; $seen_h->{$_}++; } return 1; } =head2 create_sort_order Uses the values in the query_h and the defaults to create the sort order Sets the $self param 'sort_order' to be the current sort list input: $self =cut sub create_sort_order { my $self = shift; $self->startme; my $current_sort_list; # populate the current sort order as far as we can # keep track of which params we've seen already my $query_h = $self->param('validated_query'); my $seen_h; my $group_by; # remove the group by criteria from the sort list if ($query_h->{group_by} && $query_h->{group_by} ne 'none') { ($group_by = $query_h->{group_by}) =~ s/year.+/year/; $seen_h->{$group_by}++; } foreach my $s qw( sort sort2 ) { if ($query_h->{$s}) { push @$current_sort_list, $query_h->{$s}; (my $temp = $query_h->{$s}) =~ s/year.+/year/; $seen_h->{ $temp }++; } } $self->debugme("current sort list with values from query_h: ".Dumper($current_sort_list)); if (!$seen_h) # none of the sort values were set { $self->debugme("No sort values found in query_h"); $current_sort_list = $default_sort_order; } else { # fill the rest of the sort list with the items from the default sort list foreach my $s (@$default_sort_order) { (my $temp = $s) =~ s/year.+/year/; next if $seen_h->{$temp}; push @$current_sort_list, $s; } } $self->param('sort_order', $current_sort_list); $self->debugme("sort_order post fn: ".join(", ", @$current_sort_list)); $self->debugme("sort_order param: ".Dumper( $self->param('sort_order') )); # populate sort and sort2 if they aren't already filled $query_h->{'sort'} = $current_sort_list->[0] if !$query_h->{'sort'}; $query_h->{'sort2'} = $current_sort_list->[1] if !$query_h->{'sort2'}; $self->param('validated_query', $query_h); } 1;