Prints - Indexes Prints datafile
use Index::Prints;
This package is used to index Prints datafiles (prints.pval). It indexes the file by the accession number (gx; xxx) and by name (gc; xxx).
#gc; 11SGLOBULIN #gx; PR00439 #gn; 7 #gi; 11-S seed storage protein family signature #gm; 74 #mx; 11SGLOBULIN1 #mi; Glutelin 2 motif I - 2 #ml; 18 #mc; 74 #mn; 1 #ls; 12 #hs; 618 #li; 12 #hi; 618 #A 0 1 0 6 14 0 3 6 0 62 0 0 0 0 1 0 53 0 #B 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #D 0 4 0 0 28 16 0 4 8 0 72 0 0 0 0 6 0 0 #E 0 22 0 0 9 0 0 6 5 0 1 0 0 0 0 4 0 0 #F 0 0 1 1 0 0 0 0 0 0 0 10 12 0 0 0 0 0 #G 0 0 0 0 17 0 0 0 0 1 0 0 0 0 0 3 12 74 #H 10 12 1 0 1 0 0 0 5 0 0 0 1 1 0 0 0 0 #I 0 0 0 52 0 0 0 1 0 0 0 16 0 0 0 0 0 0 #J 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #K 9 3 0 0 0 1 0 5 1 0 0 0 0 9 0 7 0 0 # ...
# examples here use Index::Prints;
my($res, $msg, $index); ($res, $index) = new Index::Prints($file); #Prints file to index die $index unless $res;
#This input record delimeter is used when retrieving an entry from a file. ($res, $msg) = $index->setRecDel('dumper', '\ngc;/'); die($msg) unless($res);
#This input record delimeter is used during the building of the index file. It reads the file line by line #and need a specific pattern to record the position in the file. ($res, $msg) = $index->setRecDel('building', 'll;'); die($msg) unless($res);
($res, $r_inx) = $index->buildIndex(['ac', 'name']); #list of entries you want to index. $regexp in this module for allowing rules. die($r_inx) unless($res); #if no argument given, will build the index with all the rules describe in $regexp.
#or you can build the index on your own key-value pairs based on regular expression. Only the first match is taken into account. #so if you want to index multiple things based on the same line, you need to create another key-value pair. ($res, $r_inx) = $index->builIndex({ 'id' => '>(\S+)', 'name' => '^\s+\w+\s+(\S+)' }); die $r_inx unless $res;
($res, $mess) = $index->indexOut($r_inx); #need a reference to hash table. die $mess unless $res;
my $id = 'PR00439'; my ($res, $pos) = $index->getIndex($id); #return the position in the file for this $id. die $pos unless $res;
#getEntry returns a reference to an array in case of multiple entries found. my ($res, $entry) = $index->getEntry($id); #return the complete entry. die $entry unless $res; $entry = $entry->[0] if($entry);
#Either (parsing once the whole entry) my($res, $msg) = $index->parseFields(\$entry); die $msg unless $res;
my ($res, $name) = $index->getField('name'); #return the name of this entry. $name = $name->[0] if $name; my ($res, $name) = $index->getField(['ac', 'name', 'desc']); #return the ac, name and description of this entry. if($name){ my $nm = $name->[0]; my $ac = $name->[1]; my $de = $name->[2]; }
.... # see below for fields you can retrieve. my ($res, $name) = $index->get_name(); #return the name of this entry. $name = $name->[0] if $name; #or simplier my ($res, $name) = $index->getField('name', \$entry); #return the name of this entry by parsing it on the fly $name = $name->[0] if $name;
$Id: Prints.pm.html,v 1.1.1.1 2005/08/18 13:18:25 hunter Exp $
Copyright (c) European Bioinformatics Institute 2004
Emmanuel Quevillon <tuco@ebi.ac.uk>
Description: Create a new object Index::HMM.
Arguments: $file a file to index $tool Do you want to use Dispatcher::Tool to use index.conf values? (optional)
Returns: 1, $self on success 0, msg on failure
Description: Initialize record delimiter, file and parse configuration file from index configuration file. Argument: Returns: 1, '' on success 0, msg on error