inc/ens.h

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * Copyright (C) 2000 Jim Kent.  This source code may be freely used         *
00003  * for personal, academic, and non-profit purposes.  Commercial use          *
00004  * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) *
00005  *****************************************************************************/
00006 /* ens.h - Interface to ensEMBL database. */
00007 #ifndef ENS_H
00008 #define ENS_H
00009 
00010 #ifndef DNAUTIL_H
00011 #include "dnautil.h"
00012 #endif 
00013 
00014 #ifndef DLIST_H
00015 #include "dlist.h"
00016 #endif 
00017 
00018 #ifndef UNFIN_H
00019 #include "unfin.h"
00020 #endif
00021 
00022 struct ensAnalysis
00023 /* A category of a feature. */
00024     {
00025     struct ensAnalysis *next;   /* Next in list */
00026     int id;                     /* Unique id for this feature type. */
00027     char *db;                   /* Database used. */
00028     char *dbVersion;            /* Version of database. */
00029     char *program;              /* Program used. */
00030     char *programVersion;       /* Version of program. */
00031     char *gffSource;            /* Source field from GFF. */
00032     char *gffFeature;           /* Feature field from GFF. */
00033     char *shortName;            /* 15 letter summary. */
00034     };
00035 
00036 struct ensFeature
00037 /* An ensemble feature. */
00038     {
00039     struct ensFeature *next;       /* Next in list. */
00040     struct contigTree *tContig;    /* Name of target (genomic) sequence */
00041     int tStart, tEnd;              /* Position in genomic sequence. */
00042     int score;                     /* Score (I don't know units) */
00043     int orientation;               /* +1 or -1.  Strand relative to contig. */
00044     int type;                      /* Index into analysis table describing type of feature. */
00045     char *typeName;                /* Subtype of type really. May be NULL. Not alloced here. */ 
00046     int qStart, qEnd;              /* Query (cDNA, protein, etc.) sequence position. */
00047     char *qName;                   /* Query sequence name. */
00048     };
00049 
00050 struct ensExon
00051 /* An ensemble exon.  Since multiple transcripts can
00052  * use the same exon, this is stored as a reference on
00053  * a dlList in the transcript and as an instance in the
00054  * slList in the gene. */
00055     {
00056     struct ensExon *next;               /* Next in list (in ensGene) */
00057     char *id;                           /* Ensemble ID (not allocated here). */
00058     struct contigTree *contig;          /* Contig within clone this is in. (Not allocated here).*/
00059     char phase;                         /* AKA Frame - codon position of 1st base. */
00060     char endPhase;                      /* Codon position of last base. */
00061     int orientation;                    /* +1 or -1. Strand relative to contig. */
00062     int seqStart;                       /* Start position. */
00063     int seqEnd;                         /* End position. */
00064     };
00065 
00066 struct ensTranscript
00067 /* A transcript (isoform) of a gene. */
00068     {
00069     struct ensTranscript *next;         /* Next in list. */
00070     char *id;                           /* Ensemble ID. */
00071     struct dlList *exonList;            /* Ordered list of exon references. */
00072     struct ensExon *startExon;          /* Reference to first coding exon. */
00073     struct ensExon *endExon;            /* Reference to last coding exon. */
00074     int startSeq, endSeq;               /* Start, end of coding region. */
00075     };
00076 
00077 struct ensGene
00078 /* A gene.  A collection of exons and how they
00079  * are put together. */
00080     {
00081     struct ensGene *next;                 /* Next in list. */
00082     char *id;                             /* Ensemble ID with many zeroes. */
00083     struct ensTranscript *transcriptList; /* List of ways to transcribe and splice. */
00084     struct hash *exonIdHash;              /* Fast lookup of exons from exon ids. */
00085     struct ensExon *exonList;             /* Total exons in all transcripts. */
00086     };
00087 
00088 void ensGetAnalysisTable(struct ensAnalysis ***retTable, int *retCount);
00089 /* Returns analysis table (array of different things a feature can be). 
00090  * No need to free this, it's managed by system. */
00091 
00092 struct dnaSeq *ensDnaInBacRange(char *clone, int start, int end, enum dnaCase dnaCase);
00093 /* Get DNA for range of clone in browser coordinates, including NNNs between contigs. */
00094 
00095 struct dnaSeq *ensDnaInBac(char *clone, enum dnaCase dnaCase);
00096 /* Get DNA for clone in browser coordinates, including NNNs between contigs. */
00097 
00098 
00099 struct ensFeature *ensGetFeature(char *featureId);
00100 /* Get a single feature of the given ID.  Returns NULL if no such feature.  */
00101 
00102 struct ensFeature *ensFeaturesInBac(char *clone);
00103 /* Get list of features associated with BAC clone. */
00104 
00105 struct ensFeature *ensFeaturesInBacRange(char *clone, int start, int end);
00106 /* Get list of features associated a section of BAC clone. */
00107 
00108 void ensFreeFeature(struct ensFeature **pFeature);
00109 /* Free up a single feature. */
00110 
00111 void ensFreeFeatureList(struct ensFeature **pFeatureList);
00112 /* Free up a list of features. */
00113 
00114 
00115 
00116 struct slName *ensGeneNamesInBac(char *bacName);
00117 /* Get list of all gene names in bac. */
00118 
00119 struct ensGene *ensGetGene(char *geneName);
00120 /* Get named gene. This can also be viewed as a list of one genes. */
00121 
00122 struct ensGene *ensGenesInBac(char *bacName);
00123 /* Get list of all genes in bac. */
00124 
00125 struct ensGene *ensGenesInBacRange(char *bacName, int start, int end);
00126 /* Get list of genes in a section of a BAC clone.  The start/end are
00127  * in browser coordinates. */
00128 
00129 void ensFreeGene(struct ensGene **pGene);
00130 /* Free up a single gene. */
00131 
00132 void ensFreeGeneList(struct ensGene **pGeneList);
00133 /* Free up a list of genes. */
00134 
00135 
00136 
00137 void ensParseContig(char *combined, char retBac[32], int *retContig);
00138 /* Parse combined bac.contig into two separate values. */
00139 
00140 int ensBrowserCoordinates(struct contigTree *contig, int x);
00141 /* Return x in browser coordinates. */
00142 
00143 int ensSubmitCoordinates(struct contigTree *contig, int x);
00144 /* Return x in GenBank/EMBL submission coordinates. */
00145 
00146 int ensBacBrowserLength(char *clone);
00147 /* Return size of clone in browser coordinate space. */
00148 
00149 int ensBacSubmitLength(char *clone);
00150 /* Return size of clone in GenBank/EMBL submission  coordinate space. */
00151 
00152 struct contigTree *ensBacContigs(char *bacId);
00153 /* Return contigTree rooted at Bac.  Do not free this or modify it, 
00154  * the system takes care of it. */
00155 
00156 struct contigTree *ensGetContig(char *contigId);
00157 /* Return contig associated with contigId. Do not free this, system
00158  * takes care of it. */
00159 
00160 void ensTranscriptBounds(struct ensTranscript *trans, int *retStart, int *retEnd);
00161 /* Find beginning and end of transcript in browser coordinates. */
00162 
00163 void ensGeneBounds(struct ensGene *gene, int *retStart, int *retEnd);
00164 /* Find beginning and end of gene in browser coordinates. */
00165 
00166 #endif /* ENS_H */
00167 
00168 

Generated on Tue Dec 25 18:39:29 2007 for blat by  doxygen 1.5.2