PfamA - Indexes Pfam-A.seed file.
use Index::PfamClan;
This package is used to index Pfam-A.seed files. It indexes the file with AC, ID, DE and NE keys (see below). You can add your own list of fileds to index by editing $regexp.
ID Identifier (short name) AC Pfam Accesion number DE Description (Full name) MB Pfam id within the clan.
## STOCKHOLM 1.0 ##=GF ID ABC_tran ##=GF AC PF00005.13 ##=GF DE ABC transporter ##=GF AU Sonnhammer ELL, Bateman A ##=GF SE Prosite ##=GF GA 8.60 8.60; 51.00 51.00; ##=GF TC 8.70 8.70; 51.00 51.10; ##=GF NC 8.50 8.50; 50.70 50.90; ##=GF TP Domain ##=GF BM hmmbuild -F --hand HMM_ls SEED ##=GF BM hmmcalibrate --seed 0 HMM_ls ##=GF BM hmmbuild -f -F --hand HMM_fs SEED ##=GF BM hmmcalibrate --seed 0 HMM_fs ##=GF AM byscore ##=GF NE PF00385; ##=GF NL Q6BZA3/885-937; ##=GF RN [1] ##=GF RM 91323725 ##=GF RT Homology between proteins controlling Streptomyces fradiae ##=GF RT tylosin resistance and ATP-binding transport. ##=GF RA Rosteck PR Jr, Reynolds PA, Hershberger CL; ##=GF RL Gene 1991;102:27-32. ##=GF RN [2] ##=GF RM 91014687 ##=GF RT Structure and function of haemolysin B,P-glycoprotein and ##=GF RT other members of a novel family of membrane translocators. ##=GF RA Blight MA, Holland IB; ##=GF RL Mol Microbiol 1990;4:873-880. ##=GF RN [3] ##=GF RM 91035372 ##=GF RT Binding protein-dependent transport systems. ##=GF RA Higgins CF, Hyde SC, Mimmack MM, Gileadi U, Gill DR, ##=GF RA Gallagher MP; ##=GF RL J Bioenerg Biomembr 1990;22:571-592. ##=GF RN [4] ##=GF RM 99087327 ##=GF RT Crystal structure of the ATP-binding subunit of an ABC ##=GF RT transporter. ##=GF RA Hung LW, Wang IX, Nikaido K, Liu PQ, Ames GF, Kim SH; ##=GF RL Nature 1998;396:703-707. ##=GF DR PROSITE; PDOC00185; ##=GF DC This structure is on hold until Dec 1999 ##=GF DR SCOP; 1b0u; fa; ##=GF DR HOMSTRAD; ABC_tran; ##=GF DR TC; 3.A.1; ##=GF DC The following Pfam-B families contain sequences that according to Prodom ##=GF DC are members of this Pfam-A family. ##=GF DR PFAMB; PB000287; ##=GF DR PFAMB; PB000816; ##=GF DR PFAMB; PB000968; ##=GF DR PFAMB; PB000994; ##=GF DR PFAMB; PB001104; ##=GF DR PFAMB; PB001282; ##=GF DR PFAMB; PB001714; ##=GF DR PFAMB; PB002185; ##=GF DR PFAMB; PB008321; ##.... ##=GF DR PFAMB; PB128701; ##=GF DR PFAMB; PB129057; ##=GF DR INTERPRO; IPR003439; ##=GF DR PDB; 1jj7 A; 531; 718; ##=GF CC ABC transporters for a large family of proteins responsible for ##=GF CC translocation of a variety of compounds across biological membranes. ##=GF CC ABC transporters are the largest family of proteins in many ##=GF CC completely sequenced bacteria. ##=GF CC ABC transporters are composed of two copies of this domain and ##=GF CC two copies of a transmembrane domain Pfam:PF00664. These four ##=GF CC domains may belong to a single polypeptide as in Swiss:P13569, or ##=GF CC belong in different polypeptide chains. ##=GF SQ 65 ##=GS LCN3_LACLA/490-669 AC P37608 ##=GS APRD_PSEAE/359-543 AC Q03024 ##=GS TAP1_HUMAN/531-718 AC Q03518 ##=GS STE6_YEAST/1080-1263 AC P12866 ##=GS MDR_PLAFF/406-638 AC P13568 ##=GS ATM1_YEAST/462-648 AC P40416 ##=GS NIST_LACLA/379-568 AC Q03203 ##=GS THIQ_ECOLI/25-206 AC P31548 ##=GS MODC_ECOLI/24-205 AC P09833 ##=GS NIKE_ECOLI/38-228 AC P33594 ##=GS ARTP_ECOLI/28-217 AC P30858 ##... ##=GS PDR5_YEAST/186-386 AC P33302 ##=GS TAP1_HUMAN/531-718 DR PDB; 1jj7 A; 531; 718; ##LCN3_LACLA/490-669 GEKIAIVGKSGSGKSTLFNILL.GLIS.......YEGEVTYG.YENLRQI..................IGVVSQNMNLR...KGSLIENIVSNNNSEEL............................................. ...............................................DIQKINDV....LKDVNM...LELV.............DSLPQKIFSQLFEN...GKNLSGGQIQ..RLLIAKSLLNNNK...FIFWDEPFSSLDNQNRIHIYKNVLENPDY.KSQTIIMISHHLD .VLKYVDRVIYI.DDK ##APRD_PSEAE/359-543 GSVVGVIGPSGSGKSSLARVVL.GIWP..T....LHGSVRLD.GAEIRQYERETLGPR..........IGYLPQDIELF...AGTVAENIARFGEV................................................ ...............................................QADKVVEA....ARLAGV...HELV.............LRLPQGYDTVLGVG...GAGLSGGQRQ..RIALARALYGAPT...LVVLDEPNSNLDDSGEQALLAAIQALKAR..GCTVLLITHRAG .VLGCADRLLAL.NAG ##TAP1_HUMAN/531-718 GEVTALVGPNGSGKSTVAALLQ.NLYQ..P....TGGQLLLD.GKPLPQYEHRYLHRQ..........VAAVGQEPQVF...GRSLQENIAYGLTQKP.............................................. ...............................................TMEEITAA....AVKSGA...HSFI.............SGLPQGYDTEVDEA...GSQLSGGQRQ..AVALARALIRKPC...VLILDDATSALDANSQLQVEQLLYESPER.YSRSVLLITQHLS LVEQ.ADHILFL.EGG ##//
# examples here use Index::PfamA;
my($res, $msg, $index); ($res, $index) = new Index::PfamA($file); #PfamA.seed file to index die $index unless $res;
#This input record delimeter is used when retrieving an entry from a file. ($res, $msg) = $index->setRecDel('dumper', '\n//'); die($msg) unless($res);
#This input record delimeter is used during the building of the index file. It reads the file line by line #and need a specific pattern to record the position in the file. ($res, $msg) = $index->setRecDel('building', '//'); die($msg) unless($res);
($res, $r_inx) = $index->buildIndex(['ac', 'name']); #list of entries you want to index. $regexp in this module for allowing rules. die($r_inx) unless($res); #if no argument given, will build the index with all the rules describe in $regexp.
#or you can build the index on your own key-value pairs based on regular expression. Only the first match is taken into account. #so if you want to index multiple things based on the same line, you need to create another key-value pair. ($res, $r_inx) = $index->builIndex({ 'id' => '>(\S+)', 'name' => '^\s+\w+\s+(\S+)' }); die $r_inx unless $res;
($res, $mess) = $index->indexOut($r_inx); #need a reference to hash table. die $mess unless $res;
my $id = 'PF00005.13'; my ($res, $pos) = $index->getIndex($id); #return the position in the file for this $id. die $pos unless $res;
#getEntry returns a reference to an array in case of multiple entries found. my ($res, $entry) = $index->getEntry($id); #return the complete entry. die $entry unless $res; $entry = $entry->[0] if($entry);
#Either (parsing once the whole entry) my($res, $msg) = $index->parseFields(\$entry); die $msg unless $res;
my ($res, $name) = $index->getField('name'); #return the name of this entry. $name = $name->[0] if $name; my ($res, $name) = $index->getField(['ac', 'name', 'desc']); #return the ac, name and description of this entry. if($name){ my $nm = $name->[0]; my $ac = $name->[1]; my $de = $name->[2]; }
.... # see below for fields you can retrieve. my ($res, $name) = $index->get_name(); #return the name of this entry. $name = $name->[0] if $name; #or simplier my ($res, $name) = $index->getField('name', \$entry); #return the name of this entry by parsing it on the fly $name = $name->[0] if $name;
$Id: PfamA.pm.html,v 1.1.1.1 2005/08/18 13:18:25 hunter Exp $
Copyright (c) European Bioinformatics Institute 2002
Emmanuel Quevillon <tuco@ebi.ac.uk>
Description: Create a new object Index::PfamA
Arguments: $file a file to index $tool Do you want to use Dispatcher::Tool to use index.conf values? (optional)
Returns: 1, $self on success 0, msg on failure
Description: Initialize record delimiter, file and parse configuration file from index configuration file. Argument: Returns: 1, '' on success 0, msg on error