<?xml version="1.0"?>
<!DOCTYPE gene2go-config SYSTEM "go_process.dtd">
<gene2go-config>

	<!--
		Note: all column numbers in this file are zero-based

		Columns in the gene_info target file are:

			 0 = tax_id
			 1 = GeneID
			 2 = Symbol
			 3 = LocusTag
			 4 = Synonyms
			 5 = dbXrefs
			 6 = chromosome
			 7 = map_location
			 8 = description
			 9 = type_of_gene
			10 = Symbol_from_nomenclature_authority
			11 = Full_name_from_nomenclature_authority
			12 = Nomenclature_status
			13 = Other_designations
			14 = Modification_date
			15 = Feature_type
	-->

	<!--Defaults that are used if the value is not provided for specific go file-->
	<defaults>
		<!--Default for file used to map ids from gene association file to gene ids-->
		<srcid2geneid-file>ftp://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz</srcid2geneid-file>
		<!--Default FTP path for gene-association files-->
		<ftp-path>https://current.geneontology.org/annotations</ftp-path>
		<ontology-file>https://current.geneontology.org/ontology/go.obo</ontology-file>
	</defaults>

	<!--List of gene association files grouped by data providers-->
	<databases>

		<database name="SGD" url="http://www.yeastgenome.org/">
			<!--
				file element attributes:
				name - gene association file name
				desc - desription of a file
				src-column - id column in source gene_association file that is used for mapping into gene_id
				target-column - column in srcid2geneid-file which should be equal to id from source gene_association file
				srcid2geneid-file - the same meaning as for defaults/srcid2geneid-file
				src-transform - Perl regex substitution applied to source column before matching it to target column
				target-transform - Perl regex substitution applied to target column before matching it to source column
				ref2pmid-url - URL of file used to map reference id from gene association file to pubmed id
				ref-transform - Perl regex substitution applied to reference ids from gene association files before mapping them to PubMed ids
			-->
			<file name="sgd.gaf.gz" desc="Saccharomyces cerevisiae" src-column="2" target-column="2">
				<!--
					List of tax-ids to report. Specify separate tax-id element for each tax_id.
					If no tax-ids elements are listed, all tax_ids from the file will be reported
				-->
				<tax-id>4932</tax-id>
				<tax-id>559292</tax-id>
				<tax-substitute source="4932" target="559292"/>
			</file>
		</database>

		<database name="FlyBase" url="http://flybase.org/">
			<file name="fb.gaf.gz" desc="Drosophila melanogaster" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Invertebrates/Drosophila_melanogaster.gene_info.gz" src-column="1" target-column="5" target-transform="s/^.*FLYBASE:(FBgn\d+).*$/$1/">
				<tax-id>7227</tax-id>
			</file>
		</database>

		<database name="Xenbase" url="http://www.xenbase.org/">
			<file name="xenbase.gaf.gz" desc="Xenopus" src-column="1" target-column="5" target-transform="s/^.*Xenbase:(XB-GENE-\d+).*$/$1/">
				<tax-id>8355</tax-id>
				<tax-id>8364</tax-id>
			</file>
		</database>

		<database name="MGI" url="http://www.informatics.jax.org/">
			<file name="mgi.gaf.gz" desc="Mus musculus" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Mus_musculus.gene_info.gz" src-column="1" target-column="5" ref2pmid-url="http://www.informatics.jax.org/downloads/reports/BIB_PubMed.rpt" ref-transform="s/^\ *MGI\ *: *|\ $//g" target-transform="s/^.*(MGI:\d+).*$/$1/">
				<tax-id>10090</tax-id>
			</file>
		</database>

		<database name="TAIR" url="http://www.arabidopsis.org/">
			<file name="tair.gaf.gz" desc="Arabidopsis thaliana" src-column="3,10" target-column="3" src-transform="s/^([^|.]*)(\.[0-9]+)?\|?.*$/$1/">
				<tax-id>3702</tax-id>
			</file>
		</database>

		<database name="WormBase" url="http://www.wormbase.org/">
				<file name="wb.gaf.gz" desc="Caenorhabditis elegans" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Invertebrates/Caenorhabditis_elegans.gene_info.gz" src-column="2,2,1,3" target-column="2,3,5,10" src-transform="s/^\s*(WormBase\s*:|CELE_)\s*//i" target-transform="s/^\s*(WormBase\s*:|CELE_)\s*//i">
				<tax-id>6239</tax-id>
			</file>
		</database>

		<database name="RGD" url="http://rgd.mcw.edu/">
			<file name="rgd.gaf.gz" desc="Rattus norvegicus" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Rattus_norvegicus.gene_info.gz" src-column="1" target-column="5" ref-transform="s/^\ *RGD *: *| $//g" target-transform="s/^.*RGD:(\d+).*$/$1/">
				<tax-id>10116</tax-id>
			</file>
		</database>

		<database name="ZFIN" url="https://zfin.org/">
			<file name="zfin.gaf.gz" desc="Danio rerio" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Non-mammalian_vertebrates/Danio_rerio.gene_info.gz" src-column="1" target-column="5" target-transform="s/^.*ZFIN:([A-Z\-0-9]+).*$/$1/">
				<tax-id>7955</tax-id>
				<tax-substitute source="0" target="7955"/>
			</file>
		</database>


		<database name="GeneDB" url="http://www.genedb.org/leish">
			<file name="genedb_pfalciparum.gaf.gz" desc="Plasmodium falciparum" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Protozoa/All_Protozoa.gene_info.gz" src-column="1" src-transform="s/\.\d*$//" target-column="3" target-transform="s/\.\d*$//">
				<tax-id>36329</tax-id>
			</file>
		</database>


		<database name="PomBase" url="http://www.pombase.org/">
			<file name="pombase.gaf.gz" desc="Schizosaccharomyces pombe" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Fungi/All_Fungi.gene_info.gz" src-column="1" src-transform="s/^/SPOM_/" target-column="3" target-transform="s/C$/c/">
				<tax-id>4896</tax-id>
				<tax-id>284812</tax-id>
				<tax-substitute source="284812" target="4896"/>
			</file>
		</database>

		<database name="JaponicusDB" url="https://www.japonicusdb.org/">
			<file name="japonicusdb.gaf.gz" desc="Schizosaccharomyces japonicus" src-column="1" target-column="3">
				<tax-id>402676</tax-id>
				<tax-id>4897</tax-id>
				<tax-substitute source="4897" target="402676"/>
			</file>
		</database>

		<database name="GOA" url="http://www.ebi.ac.uk/GOA/">
			<file name="goa_human.gaf.gz" desc="Homo sapiens" src-column="1" target-column="5" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/gene2accession.gz" target-transform="s/\.\d*$//">
				<tax-id>9606</tax-id>
			</file>
			<file name="goa_human.gaf.gz" desc="Homo sapiens" src-column="1" target-column="2" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/.misc/RNAcentral.txt">
				<tax-id>9606</tax-id>
			</file>
		</database>

		<database name="dictyBase" url="http://dictybase.org">
			<file name="dictybase.gaf.gz" desc="Dictyostelium discoideum" src-column="2" target-column="2">
				<tax-id>44689</tax-id>
				<tax-substitute source="44689" target="352472"/>
			</file>
		</database>

		<database name="PAMGO" url="http://www.agrobacterium.org/">
			<!--file name="gene_association.PAMGO_Atumefaciens.gz" desc="Agrobacterium tumefaciens str. C58" src-column="1" target-column="5" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/gene2accession.gz" >
				<tax-id>176299</tax-id>
			</file-->
			<!--file name="gene_association.PAMGO_Mgrisea.gz" desc="Magnaporthe grisea" src-column="1" target-column="5" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/gene2accession.gz"  target-transform="s/\.\d*$//">
				<tax-id>148305</tax-id>
				<tax-substitute source="148305" target="242507"/>
			</file-->
		</database>

		<database name="AspGD" url="http://www.ebi.ac.uk/GOA/">
			<file name="22026.E_nidulans.goa" ftp-path="https://ftp.ebi.ac.uk/pub/databases/GO/goa/proteomes" desc="Aspergillus nidulans FGSC A4" src-column="10" target-column="3">
				<tax-id>227321</tax-id>
			</file>
		</database>


		<database name="EcoCyc" url="http://ecocyc.org">
			<file name="ecocyc.gaf.gz" desc="Escherichia coli K-12" srcid2geneid-file="ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Archaea_Bacteria/Escherichia_coli_str._K-12_substr._MG1655.gene_info.gz" src-column="2" target-column="2" src-transform="s/^([A-Z])(.*)$/\l$1$2/">
				<tax-id>83333</tax-id>
				<tax-substitute source="83333" target="511145"/>
			</file>
		</database>

		<database name="PseudoCAP" url="http://www.pseudomonas.com/">
			<file name="pseudocap.gaf.gz" desc="Pseudomonas aeruginosa" src-column="2" target-column="2">
				<tax-id>208964</tax-id>
			</file>
		</database>

	</databases>

	<!--Gene association files from GO FTP site that we know of but don't use currently-->
	<unused-files>
		<file name="cgd.gaf.gz"/>
		<file name="filtered_goa_uniprot_all.gaf.gz"/>
		<file name="filtered_goa_uniprot_all_noiea.gaf.gz"/>
		<file name="genedb_lmajor.gaf.gz"/>
		<file name="genedb_tbrucei.gaf.gz"/>
		<file name="goa_chicken.gaf.gz"/>
		<file name="goa_cow.gaf.gz"/>
		<file name="goa_dog.gaf.gz"/>
		<file name="goa_pig.gaf.gz"/>
		<file name="reactome.gaf.gz"/>
		<file name="sgn.gaf.gz"/>
	</unused-files>

</gene2go-config>
