00001 /***************************************************************************** 00002 * Copyright (C) 2000 Jim Kent. This source code may be freely used * 00003 * for personal, academic, and non-profit purposes. Commercial use * 00004 * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) * 00005 *****************************************************************************/ 00006 /* ens.h - Interface to ensEMBL database. */ 00007 #ifndef ENS_H 00008 #define ENS_H 00009 00010 #ifndef DNAUTIL_H 00011 #include "dnautil.h" 00012 #endif 00013 00014 #ifndef DLIST_H 00015 #include "dlist.h" 00016 #endif 00017 00018 #ifndef UNFIN_H 00019 #include "unfin.h" 00020 #endif 00021 00022 struct ensAnalysis 00023 /* A category of a feature. */ 00024 { 00025 struct ensAnalysis *next; /* Next in list */ 00026 int id; /* Unique id for this feature type. */ 00027 char *db; /* Database used. */ 00028 char *dbVersion; /* Version of database. */ 00029 char *program; /* Program used. */ 00030 char *programVersion; /* Version of program. */ 00031 char *gffSource; /* Source field from GFF. */ 00032 char *gffFeature; /* Feature field from GFF. */ 00033 char *shortName; /* 15 letter summary. */ 00034 }; 00035 00036 struct ensFeature 00037 /* An ensemble feature. */ 00038 { 00039 struct ensFeature *next; /* Next in list. */ 00040 struct contigTree *tContig; /* Name of target (genomic) sequence */ 00041 int tStart, tEnd; /* Position in genomic sequence. */ 00042 int score; /* Score (I don't know units) */ 00043 int orientation; /* +1 or -1. Strand relative to contig. */ 00044 int type; /* Index into analysis table describing type of feature. */ 00045 char *typeName; /* Subtype of type really. May be NULL. Not alloced here. */ 00046 int qStart, qEnd; /* Query (cDNA, protein, etc.) sequence position. */ 00047 char *qName; /* Query sequence name. */ 00048 }; 00049 00050 struct ensExon 00051 /* An ensemble exon. Since multiple transcripts can 00052 * use the same exon, this is stored as a reference on 00053 * a dlList in the transcript and as an instance in the 00054 * slList in the gene. */ 00055 { 00056 struct ensExon *next; /* Next in list (in ensGene) */ 00057 char *id; /* Ensemble ID (not allocated here). */ 00058 struct contigTree *contig; /* Contig within clone this is in. (Not allocated here).*/ 00059 char phase; /* AKA Frame - codon position of 1st base. */ 00060 char endPhase; /* Codon position of last base. */ 00061 int orientation; /* +1 or -1. Strand relative to contig. */ 00062 int seqStart; /* Start position. */ 00063 int seqEnd; /* End position. */ 00064 }; 00065 00066 struct ensTranscript 00067 /* A transcript (isoform) of a gene. */ 00068 { 00069 struct ensTranscript *next; /* Next in list. */ 00070 char *id; /* Ensemble ID. */ 00071 struct dlList *exonList; /* Ordered list of exon references. */ 00072 struct ensExon *startExon; /* Reference to first coding exon. */ 00073 struct ensExon *endExon; /* Reference to last coding exon. */ 00074 int startSeq, endSeq; /* Start, end of coding region. */ 00075 }; 00076 00077 struct ensGene 00078 /* A gene. A collection of exons and how they 00079 * are put together. */ 00080 { 00081 struct ensGene *next; /* Next in list. */ 00082 char *id; /* Ensemble ID with many zeroes. */ 00083 struct ensTranscript *transcriptList; /* List of ways to transcribe and splice. */ 00084 struct hash *exonIdHash; /* Fast lookup of exons from exon ids. */ 00085 struct ensExon *exonList; /* Total exons in all transcripts. */ 00086 }; 00087 00088 void ensGetAnalysisTable(struct ensAnalysis ***retTable, int *retCount); 00089 /* Returns analysis table (array of different things a feature can be). 00090 * No need to free this, it's managed by system. */ 00091 00092 struct dnaSeq *ensDnaInBacRange(char *clone, int start, int end, enum dnaCase dnaCase); 00093 /* Get DNA for range of clone in browser coordinates, including NNNs between contigs. */ 00094 00095 struct dnaSeq *ensDnaInBac(char *clone, enum dnaCase dnaCase); 00096 /* Get DNA for clone in browser coordinates, including NNNs between contigs. */ 00097 00098 00099 struct ensFeature *ensGetFeature(char *featureId); 00100 /* Get a single feature of the given ID. Returns NULL if no such feature. */ 00101 00102 struct ensFeature *ensFeaturesInBac(char *clone); 00103 /* Get list of features associated with BAC clone. */ 00104 00105 struct ensFeature *ensFeaturesInBacRange(char *clone, int start, int end); 00106 /* Get list of features associated a section of BAC clone. */ 00107 00108 void ensFreeFeature(struct ensFeature **pFeature); 00109 /* Free up a single feature. */ 00110 00111 void ensFreeFeatureList(struct ensFeature **pFeatureList); 00112 /* Free up a list of features. */ 00113 00114 00115 00116 struct slName *ensGeneNamesInBac(char *bacName); 00117 /* Get list of all gene names in bac. */ 00118 00119 struct ensGene *ensGetGene(char *geneName); 00120 /* Get named gene. This can also be viewed as a list of one genes. */ 00121 00122 struct ensGene *ensGenesInBac(char *bacName); 00123 /* Get list of all genes in bac. */ 00124 00125 struct ensGene *ensGenesInBacRange(char *bacName, int start, int end); 00126 /* Get list of genes in a section of a BAC clone. The start/end are 00127 * in browser coordinates. */ 00128 00129 void ensFreeGene(struct ensGene **pGene); 00130 /* Free up a single gene. */ 00131 00132 void ensFreeGeneList(struct ensGene **pGeneList); 00133 /* Free up a list of genes. */ 00134 00135 00136 00137 void ensParseContig(char *combined, char retBac[32], int *retContig); 00138 /* Parse combined bac.contig into two separate values. */ 00139 00140 int ensBrowserCoordinates(struct contigTree *contig, int x); 00141 /* Return x in browser coordinates. */ 00142 00143 int ensSubmitCoordinates(struct contigTree *contig, int x); 00144 /* Return x in GenBank/EMBL submission coordinates. */ 00145 00146 int ensBacBrowserLength(char *clone); 00147 /* Return size of clone in browser coordinate space. */ 00148 00149 int ensBacSubmitLength(char *clone); 00150 /* Return size of clone in GenBank/EMBL submission coordinate space. */ 00151 00152 struct contigTree *ensBacContigs(char *bacId); 00153 /* Return contigTree rooted at Bac. Do not free this or modify it, 00154 * the system takes care of it. */ 00155 00156 struct contigTree *ensGetContig(char *contigId); 00157 /* Return contig associated with contigId. Do not free this, system 00158 * takes care of it. */ 00159 00160 void ensTranscriptBounds(struct ensTranscript *trans, int *retStart, int *retEnd); 00161 /* Find beginning and end of transcript in browser coordinates. */ 00162 00163 void ensGeneBounds(struct ensGene *gene, int *retStart, int *retEnd); 00164 /* Find beginning and end of gene in browser coordinates. */ 00165 00166 #endif /* ENS_H */ 00167 00168
1.5.2