1	embl_acc	European Nucleotide Archive (was EMBL) accession	\N
2	status	Status	\N
3	synonym	Synonym	\N
4	name	Name	Alternative/long name
5	type	Type of feature	\N
6	toplevel	Top Level	Top Level Non-Redundant Sequence Region
7	GeneCount	Gene Count	Total Number of Genes
8	KnownGeneCount	Known Gene Count	Total Number of Known Genes
9	PseudoGeneCount	PseudoGene Count	Total Number of PseudoGenes
10	SNPCount	SNP Count	Total Number of SNPs
11	codon_table	Codon Table	Alternate codon table
12	_selenocysteine	Selenocysteine	\N
13	bacend	bacend	\N
14	htg	htg	High Throughput phase attribute
15	miRNA	Micro RNA	Coordinates of the mature miRNA
16	non_ref	Non Reference	Non Reference Sequence Region
17	sanger_project	Sanger Project name	\N
18	clone_name	Clone name	\N
19	fish	FISH location	\N
21	org	Sequencing centre	\N
22	method	Method	\N
23	superctg	Super contig id	\N
24	inner_start	Max start value	\N
25	inner_end	Min end value	\N
26	state	Current state of clone	\N
27	organisation	Organisation sequencing clone	\N
28	seq_len	Accession length	\N
29	fp_size	FP size	\N
30	BACend_flag	BAC end flags	\N
31	fpc_clone_id	fpc clone	\N
32	KnwnPCCount	protein_coding_KNOWN	Number of Known Protein Coding
33	NovPCCount	protein_coding_NOVEL	Number of Novel Protein Coding
34	NovPTCount	processed_transcript_NOVEL	Number of Novel Processed Transcripts
35	PutPTCount	processed_transcript_PUTATIVE	Number of Putative Processed Transcripts
36	PredPCCount	protein_coding_PREDICTED	Number of Predicted Protein Coding
37	IGGeneCount	IG_gene	Number of IG Genes
38	IGPsGenCount	IG_pseudogene	Number of IG Pseudogenes
39	TotPsCount	total_pseudogene	Total Number of Pseudogenes
42	KnwnPCProgCount	protein_coding_in_progress_KNOWN	Number of Known Protein Coding in progress
43	NovPCProgCount	protein_coding_in_progress_NOVEL	Number of Novel Protein Coding in progress
44	AnnotSeqLength	Annotated sequence length	Annotated Sequence
45	TotCloneNum	Total number of clones	Total Number of Clones
46	NumAnnotClone	Fully annotated clones	Number of Fully Annotated Clones
47	ack	Acknowledgement	Acknowledgement for manual annotation
48	htg_phase	High throughput phase	High throughput genomic sequencing phase
49	description	Description	A general descriptive text attribute
50	chromosome	Chromosome	Chromosomal location for supercontigs that are not assembled
51	nonsense	Nonsense Mutation	Strain specific nonesense mutation
52	author	Author	Group resonsible for Vega annotation
53	author_email	Author email address	Author email address
54	remark	Remark	Annotation remark
55	transcr_class	Transcript class	Transcript class
56	KnwnPTCount	processed_transcript_KNOWN	Number of Known Processed Transcripts
57	ccds	CCDS	CCDS identifier
58	CCDS_PublicNote	CCDS Public Note	Public Note for CCDS identifier, provided by http://www.ncbi.nlm.nih.gov/CCDS
59	Frameshift	Frameshift	Frameshift modelled as intron
60	PTCount	processed_transcript	Number of Processed Transcripts
61	PredPTCount	processed_transcript_PREDICTED	Number of Predicted Processed Transcripts
62	ncRNA	Structure	RNA secondary structure line
63	skip_clone	skip clone  Skip clone in align_by_clone_identity.pl	\N
64	coding_cnt	Protein coding gene count	Number of protein coding Genes
65	GeneNo_novCod	novel protein_coding Gene Count	Number of novel protein_coding Genes
66	GeneNo_rRNA	rRNA Gene Count	Number of rRNA Genes
67	pseudogene_cnt	Pseudogene count	Number of pseudogenes
68	GeneNo_snRNA	snRNA Gene Count	Number of snRNA Genes
69	GeneNo_snoRNA	snoRNA Gene Count	Number of snoRNA Genes
70	GeneNo_miRNA	miRNA Gene Count	Number of miRNA Genes
71	GeneNo_mscRNA	misc_RNA Gene Count	Number of misc_RNA Genes
72	GeneNo_scRNA	scRNA Gene Count	Number of scRNA Genes
73	GeneNo_MTrRNA	Mt_rRNA Gene Count	Number of Mt_rRNA Genes
74	GeneNo_MTtRNA	Mt_tRNA Gene Count	Number of Mt_tRNA Genes
75	GeneNo_RNA_pseu	RNA_pseudogene Gene Count	Number of RNA_pseudogene Genes
76	GeneNo_tRNA	tRNA Gene Count	Number of tRNA Genes
77	GeneNo_rettran	retrotransposed Gene Count	Number of retrotransposed Genes
78	GeneNo_snlRNA	snlRNA Gene Count	Number of snlRNA Genes
79	GeneNo_proc_tr	processed_transcript Gene Count	Number of processed transcript Genes
80	supercontig	SuperContig name	\N
81	well_name	Well plate name	\N
82	bacterial	Bacterial	\N
83	NovelCDSCount	Novel CDS Count	\N
84	NovelTransCount	Novel Transcript Count	\N
85	PutTransCount	Putative Transcript Count	\N
86	PredTransCount	Predicted Transcript Count	\N
87	UnclassPsCount	Unclass Ps count	\N
88	KnwnprogCount	Known prog Count	\N
89	NovCDSprogCount	Novel CDS prog count	\N
90	bacend_well_nam	BACend well name	\N
91	alt_well_name	Alt well name	\N
92	TranscriptEdge	Transcript Edge	\N
93	alt_embl_acc	Alt European Nucleotide Archive (was EMBL) acc	\N
94	alt_org	Alt org	\N
95	intl_clone_name	International Clone Name	\N
96	embl_version	European Nucleotide Archive (was EMBL) Version	\N
97	chr	Chromosome Name	Chromosome Name Contained in the Assembly
98	equiv_asm	Equivalent EnsEMBL assembly	For full chromosomes made from NCBI AGPs
99	GeneNo_ncRNA	ncRNA Gene Count	Number of ncRNA Genes
100	GeneNo_Ig	Ig Gene Count	Number of Ig Genes
109	HitSimilarity	hit similarity	percentage id to parent transcripts
110	HitCoverage	hit coverage	coverage of parent transcripts
111	PropNonGap	proportion non gap	proportion non gap
112	NumStops	number of stops	\N
113	GapExons	gap exons	number of gap exons
114	SourceTran	source transcript	source transcript
115	EndNotFound	end not found	end not found
116	StartNotFound	start not found	start not found
117	Frameshift Fra	Frameshift modelled as intron	\N
118	ensembl_name	Ensembl name	Name of equivalent Ensembl chromosome
119	NoAnnotation	NoAnnotation	Clones without manual annotation
120	hap_contig	Haplotype contig	Contig present on a haplotype
121	annotated	Clone Annotation Status	\N
122	keyword	Clone Keyword	\N
123	hidden_remark	Hidden Remark	\N
124	mRNA_start_NF	mRNA start not found	\N
125	mRNA_end_NF	mRNA end not found	\N
126	cds_start_NF	CDS start not found	\N
127	cds_end_NF	CDS end not found	\N
128	write_access	Write access for Sequence Set	1 for writable , 0 for read-only
129	hidden	Hidden Sequence Set	\N
130	vega_name	Vega name	Vega seq_region.name
131	vega_export_mod	Export mode	E (External), I (Internal) etc
132	vega_release	Vega release	Vega release number
133	atag_CLE	Clone_left_end	Clone_lef_end feature marked in GAP database
134	atag_CRE	Clone_right_end	Clone_right_end feature marked in GAP database
135	atag_Misc	Misc	miscellaneous feature marked in GAP database
136	atag_Unsure	Unsure	region of uncertain DNA sequence marked in GAP database
137	MultAssem	Multiple Assembled seq region	Part of Seq Region is part of more than one assembly
140	wgs	WGS contig	WGS contig integrated into the map
141	bac	AGP clones	tiling path of clones
142	GeneGC	Gene GC	Percentage GC content for this gene
143	TotAssemblyLeng	Finished sequence length	Length of the assembly not counting sequence gaps
144	amino_acid_sub	Amino acid substitution	Some translations have been manually curated for amino acid substitiutions. For example a stop codon may be changed to an amino acid in order to prevent premature truncation, or one amino acid can be substituted for another.
145	_rna_edit	rna_edit	RNA edit
146	kill_reason	Kill Reason	Reason why a transcript has been killed
147	strip_UTR	Strip UTR	Transcript needs bad UTR removing
148	TotAssLength	Finished sequence length	Finished Sequence
149	PsCount	pseudogene	Number of Pseudogenes
152	TotPTCount	total_processed_transcript	Total Number of Processed Transcripts
153	TotPCCount	total_protein_coding	Total Number of Protein Coding
154	NovNcCount	novel_non_coding	Number of Novel Non Coding
155	KnwnPolyPsCount	known_polymorphic	Number of Known Polymorphic Pseudogenes
156	PolyPsCount	polymorphic_pseudogene	Number of Polymorphic Pseudogenes
157	TotIGGeneCount	total_IG_gene	Total Number of IG Genes
158	ProcPsCount	proc_pseudogene	Number of Processed Pseudogenes
159	UnPsCount	unproc_pseudogene	Number of Unprocessed Pseudogenes
160	TPsCount	transcribed_pseudogene	Number of Transcribed Pseudogenes
161	TECCount	TEC	Number of TEC Genes
162	KnwnIGGeneCount	IG_gene_KNOWN	Number of Known IG Genes
163	KnwnIGPsGeCount	IG_pseudogene_KNOWN	Number of Known IG Pseudogenes
164	IsoPoint	Isoelectric point	Pepstats attributes
165	Charge	Charge	Pepstats attributes
166	MolecularWeight	Molecular weight	Pepstats attributes
167	NumResidues	Number of residues	Pepstats attributes
168	AvgResWeight	Ave. residue weight	Pepstats attributes
170	initial_met	Initial methionine	Set first amino acid to methionine
171	NonGapHCov	NonGapHCov	\N
172	otter_support	otter support	Evidence ID that was used as supporting feature for building a gene in Vega
173	enst_link	enst link	Code to link a OTTT with an ENST when they both share the CDS of ENST
174	upstream_ATG	upstream ATG	Alternative ATG found upstream of the defined as start ATG for the transcript
175	TPPsCount	transcribed_processed_pseudogene	Number of Transcribed Processed Pseudogenes
176	TUPsCount	transcribed_unprocessed_pseudogene	Number of Transcribed Unprocessed Pseudogenes
177	UniPsCount	unitary_pseudogene	Number of Unitary Pseudogenes
178	KnwnTECCount	TEC_KNOWN	Number of Known TEC genes
179	TotTECGeneCount	TEC_all	Total number of TEC genes
180	TUyPsCount	transcribed_unitary_pseudogene	Number of Transcribed Unitary Pseudogenes
181	PolyCount	polymorphic	Number of Polymorphic Genes
182	KnwnPolyCount	polymorphic	Number of Known Polymorphic Genes
183	KnwnTRCount	TR_gene_known	Number of Known TR Genes
184	TRGeneCount	TR_gene	Number of TR Genes
185	TRPsCount	TR_pseudo	Number of TR Pseudogenes
188	ep_ott_support	otter protein exon support	Evidence ID that was used as supporting feature for building a gene in Vega
189	ed_ott_support	otter dna exon support	Evidence ID that was used as supporting feature for building a gene in Vega
190	GeneNo_lincRNA	lincRNA Gene Count	Number of lincRNA Genes
191	StopGained	SNP causes stop codon to be gained	This transcript has a variant that causes a stop codon to be gained in at least 10 percent of a HapMap population
192	StopLost	SNP causes stop codon to be lost	This transcript has a variant that causes a stop codon to be lost in at least 10 percent of a HapMap population
193	GeneNo_class_I_	class_I_RNA Gene Count	Number of class_I_RNA Genes
194	GeneNo_SRP_RNA	SRP_RNA Gene Count	Number of SRP_RNA Genes
195	GeneNo_class_II	class_II_RNA Gene Count	Number of class_II_RNA Genes
196	GeneNo_P_RNA	RNase_P_RNA Gene Count	Number of RNase_P_RNA Genes
197	GeneNo_RNase_MR	RNase_MRP_RNA Gene Count	Number of RNase_MRP_RNA Genes
198	lost_frameshift	lost_frameshift	Frameshift on the query sequence is lost in the target sequence
199	AltThreePrime	Alternate three prime end	The position of other possible three prime ends for the transcript
216	GeneInLRG	Gene in LRG	This gene is contained within an LRG region
217	GeneOverlapLRG	Gene overlaps LRG	This gene is partially overlapped by a LRG region (start or end outside LRG)
218	readthrough_tra	readthrough transcript	Havana readthrough transcripts
300	CNE	Constitutive exon	An exon that is always included in the mature mRNA, even in different mRNA isoforms
301	CE	Cassette exon	One exon is spliced out of the primary transcript together with its flanking introns
302	IR	Intron retention	A sequence is spliced out as an intron or remains in the mature mRNA transcript
303	MXE	Mutually exclusive exons	In the simpliest case, one or two consecutive exons are retained but not both
304	A3SS	Alternative 3' sites	Two or more splice sites are recognized at the 5' end of an exon. An alternative 3' splice junction (acceptor site) is used, changing the 5' boundary of the downstream exon
305	A5SS	Alternative 5' sites	Two or more splice sites are recognized at the 3' end of an exon. An alternative 5' splice junction (donor site) is used, changing the 3' boundary of the upstream exon
306	AFE	Alternative first exon	The second exons of each variant have identical boundaries, but the first exons do not overlap
307	ALE	Alternative last exon	Penultimate exons of each splice variant have identical boundaries, but the last exons do not overlap
308	II	Intron isoform	Alternative donor or acceptor splice sites lead to truncation or extension of introns, respectively
309	EI	Exon isoform	Alternative donor or acceptor splice sites leads to truncation or extension of exons, respectively
310	AI	Alternative initiation	Alternative choice of promoters
311	AT	Alternative termination	Alternative choice of polyadenylation sites
312	patch_fix	Assembly Patch Fix	Assembly patch that will, in the next assembly release, replace the corresponding sequence found in the current assembly
313	patch_novel	Assembly Patch Novel	Assembly patch that will, in the next assembly release, be retained as an alternate non-reference sequence in a similar way to haplotypes
314	LRG	Locus Reference Genomic	Locus Reference Genomic sequence
315	NoEvidence	Evidence for transcript removed	Supporting evidence for this projected transcript has been removed
316	circular_seq	Circular sequence	Circular chromosome or plasmid molecule
317	external_db	External database	External database to which seq_region name may be linked
318	split_tscript	split_tscript	split_tscript
319	Threep	Three prime end	Alternate three prime end
320	gene_cluster	Gene cluster	Havana annotated gene cluster
328	_rib_frameshift	Ribosomal Frameshift	Position and magnitude of frameshift
345	vega_ref_chrom	Vega reference chromosome	Haplotypes reference a regular chromosome (indicated in the value of the attribute)
346	PutPCCount	protein_coding_PUTATIVE	Number of Putative Protein Coding
347	proj_alt_seq	Projection altered sequence	Projected sequence differs from original
348	hav_gene_type	Havana gene biotype	Gene biotype assigned by Havana
349	GeneNo_asense	antisense Gene Count	Number of antisense Genes
350	GeneNo_sense_in	sense_intronic Gene Count	Number of sense_intronic Genes
351	GeneNo_amb_orf	ambiguous_orf Gene Count	Number of ambiguous_orf Genes
352	GeneNo_ret_int	retained_intron Gene Count	Number of retained_intron Genes
353	noncoding_cnt	Non coding gene count	Number of non coding genes
354	GeneNo_ncrna_h	ncrna_host Gene Count	Number of ncrna_host Genes
355	GeneNo_sens_ov	sense_overlapping Gene Count	Number of sense_overlapping Genes
356	GeneNo_3prime	3prime_overlapping Gene Count	Number of 3prime_overlapping Genes
357	GeneNo_tmRNA	tmRNA Gene Count	Number of tmRNA Genes
358	PHIbase_mutant	PHI-base mutant	PHI-base phenotype of the mutants
359	GeneNo_ribozyme	ribozyme Gene Count	Number of ribozyme Genes
360	ncrna_host	ncrna_host	Havana ncrna_host gene
361	peptide-class	Peptide classification	The classification of the gene or transcript based on alignment to NR (values: TE WH NH)
362	working-set	Working Gene Set	High-confidence set of genes, composed of evidence-based genes and non-overlapping protein-coding ab initio gene models
363	filtered-set	Filtered Gene Set v1	Working genes that are screened for TE content and orthology with sorghum and rice.
364	super-set	Super Working Gene Set	Set of all working gene set loci from both Builds 4a and 5a
365	projected4a2	Projected by alignment	Temporary (Monday, August 23, 2010)
366	merged	Merged species	\N
367	karyotype_rank	Rank in the karyotype	For a given seq_region, if it is part of the species karyotype, will indicate its rank
