inc/wormdna.h

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * Copyright (C) 2000 Jim Kent.  This source code may be freely used         *
00003  * for personal, academic, and non-profit purposes.  Commercial use          *
00004  * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) *
00005  *****************************************************************************/
00006 /* WormDNA - stuff that finds C. elegans sequence data. */
00007 #ifndef WORMDNA_H
00008 #define WORMDNA_H
00009 
00010 #ifndef DNAUTIL_H
00011 #include "dnautil.h"
00012 #endif
00013 
00014 #ifndef DNASEQ_H
00015 #include "dnaseq.h"
00016 #endif 
00017 
00018 #ifndef NT4_H
00019 #include "nt4.h"
00020 #endif 
00021 
00022 #ifndef GDF_H
00023 #include "gdf.h"
00024 #endif
00025 
00026 
00027 struct wormCdnaInfo 
00028 /* Extra info stored in cDNA database other than string. */
00029     {
00030     char *motherString;         /* Holds memory for other strings. */
00031     char *name;                 /* Name of cDNA. */
00032     char *gene;                 /* Something like unc-1 */
00033     char orientation;           /* + or - (relative to gene transcription direction) */
00034     char *product;              /* Something like "cyclin-dependent kinase" */
00035     int cdsStart, cdsEnd;       /* Start and stop of coding region within cDNA. */
00036     boolean knowStart, knowEnd; /* Start known?  End known? */
00037     boolean isEmbryonic;        /* True if derived from embryo culture. */
00038     boolean isAdult;            /* True if derived from adult culture. */
00039     boolean isMale;             /* True if males only. */
00040     boolean isHermaphrodite;    /* True if hermaphrodites only. */
00041     char *description;          /* One line description. */
00042     };
00043 
00044 boolean wormCdnaInfo(char *name, struct wormCdnaInfo *retInfo);
00045 /* Get info about cDNA sequence. */
00046 
00047 void wormFaCommentIntoInfo(char *faComment, struct wormCdnaInfo *retInfo);
00048 /* Process line from .fa file containing information about cDNA into binary
00049  * structure. */
00050 
00051 void wormFreeCdnaInfo(struct wormCdnaInfo *ci);
00052 /* Free the mother string in the cdnaInfo.  (The info structure itself normally isn't
00053  * dynamically allocated. */
00054 
00055 boolean wormInfoForGene(char *orfName, struct wormCdnaInfo *retInfo);
00056 /* Return info if any on gene/ORF, or NULL if none exists. wormFreeCdnaInfo retInfo when done.
00057  */
00058 
00059 
00060 boolean wormCdnaSeq(char *name, struct dnaSeq **retDna, struct wormCdnaInfo *retInfo);
00061 /* Get a single worm cDNA sequence. Optionally (if retInfo is non-null) get additional
00062  * info about the sequence. */
00063 
00064 void wormCdnaUncache();
00065 /* Get rid of any resources used caching or quickly accessing
00066  * worm cDNA */
00067 
00068 
00069 /* Stuff for searching entire database of worm cDNA */
00070 
00071 struct wormCdnaIterator
00072     {
00073     FILE *faFile;
00074     };
00075 
00076 boolean wormSearchAllCdna(struct wormCdnaIterator **retSi);
00077 /* Set up to search entire database or worm cDNA */
00078 
00079 void freeWormCdnaIterator(struct wormCdnaIterator **pIt);
00080 /* Free up iterator returned by wormSearchAllCdna() */
00081 
00082 struct dnaSeq *nextWormCdna(struct wormCdnaIterator *it);
00083 /* Return next sequence in database */
00084 
00085 boolean nextWormCdnaAndInfo(struct wormCdnaIterator *it, struct dnaSeq **retSeq, struct wormCdnaInfo *retInfo);
00086 /* Return next sequence and associated info from database. */
00087 
00088 char *wormFeaturesDir();
00089 /* Return the features directory. (Includes trailing slash.) */
00090 
00091 char *wormChromDir();
00092 /* Return the directory with the chromosomes. */
00093 
00094 char *wormCdnaDir();
00095 /* Return directory with cDNA data. */
00096 
00097 char *wormSangerDir();
00098 /* Return directory with Sanger specific gene predictions. */
00099 
00100 char *wormGenieDir();
00101 /* Return directory with Genie specific gene predictions. */
00102 
00103 char *wormXenoDir();
00104 /* Return directory with cross-species alignments. */
00105 
00106 boolean wormIsGeneName(char *name);
00107 /* See if it looks like a worm gene name - that is
00108  *   abc-12
00109  * letters followed by a dash followed by a number. */
00110 
00111 boolean wormIsOrfName(char *in);
00112 /* Check to see if the input is formatted correctly to be
00113  * an ORF. */
00114 
00115 struct slName *wormGeneToOrfNames(char *name);
00116 /* Returns list of cosmid.N type ORF names that are known by abc-12 type name. */
00117 
00118 char *wormGeneFirstOrfName(char *geneName);
00119 /* Return first ORF synonym to gene. */
00120 
00121 boolean wormGeneForOrf(char *orfName, char *geneNameBuf, int bufSize);
00122 /* Look for gene type (unc-12 or something) synonym for cosmid.N name. */
00123 
00124 boolean getWormGeneExonDna(char *name, DNA **retDna);
00125 /* get the exon sequence for a gene */
00126 
00127 boolean getWormGeneDna(char *name, DNA **retDna, boolean upcExons);
00128 /* Get the DNA associated with a gene.  Optionally upper case exons. */
00129 
00130 void wormLoadNt4Genome(struct nt4Seq ***retNt4Seq, int *retNt4Count);
00131 /* Load up entire packed worm genome into memory. */
00132 
00133 void wormFreeNt4Genome(struct nt4Seq ***pNt4Seq);
00134 /* Free up packed worm genome. */
00135 
00136 int wormChromSize(char *chrom);
00137 /* Return size of worm chromosome. */
00138 
00139 DNA *wormChromPart(char *chromId, int start, int size);
00140 /* Return part of a worm chromosome. */
00141 
00142 DNA *wormChromPartExonsUpper(char *chromId, int start, int size);
00143 /* Return part of a worm chromosome with exons in upper case. */
00144 
00145 void wormChromNames(char ***retNames, int *retNameCount);
00146 /* Get list of worm chromosome names. */
00147 
00148 int wormChromIx(char *name);
00149 /* Return index of worm chromosome. */
00150 
00151 char *wormChromForIx(int ix);
00152 /* Given ix, return worm chromosome official name. */
00153 
00154 char *wormOfficialChromName(char *name);
00155 /* This returns a pointer to our official string for the chromosome name.
00156  * (This allows some routines to do directo pointer comparisons rather
00157  * than string comparisons.) */
00158 
00159 boolean wormGeneRange(char *name, char **retChromId, char *retStrand, int *retStart, int *retEnd);
00160 /* Return chromosome position of a gene, ORF,  nameless cluster, or cosmid. Returns
00161  * FALSE if no such gene/cluster. */
00162 
00163 boolean wormParseChromRange(char *in, char **retChromId, int *retStart, int *retEnd);
00164 /* Chop up a string representation of a range within a chromosome and put the
00165  * pieces into the return variables. Return FALSE if it isn't formatted right. */
00166 
00167 boolean wormIsChromRange(char *in);
00168 /* Check to see if the input is formatted correctly to be
00169  * a range of a chromosome. */
00170 
00171 void wormClipRangeToChrom(char *chrom, int *pStart, int *pEnd);
00172 /* Make sure that we stay inside chromosome. */
00173 
00174 boolean wormIsNamelessCluster(char *name);
00175 /* Returns true if name is of correct format to be a nameless cluster. */
00176 
00177 DNA *wormGetNamelessClusterDna(char *name);
00178 /* Get DNA associated with nameless cluster */
00179 
00180 struct wormFeature
00181 /* This holds info on where something is in the genome. */
00182     {
00183     struct wormFeature *next;
00184     char *chrom;    /* One of names returned by */
00185     int start, end;
00186     char typeByte;
00187     char name[1];   /* Allocated to fit. */
00188     };
00189 
00190 struct wormFeature *newWormFeature(char *name, char *chrom, int start, int end, char typeByte);
00191 /* Allocate a new feature. */
00192 
00193 struct wormFeature *wormCdnasInRange(char *chromId, int start, int end);
00194 /* Get info on all cDNAs that overlap the range. */
00195 
00196 struct wormFeature *wormGenesInRange(char *chromId, int start, int end);
00197 /* Get info on all genes that overlap the range. */
00198 
00199 struct wormFeature *wormSomeGenesInRange(char *chromId, int start, int end, char *gdfDir);
00200 /* Get info on genes that overlap range in a particular set of gene predictions. */
00201 
00202 struct wormFeature *wormCosmidsInRange(char *chromId, int start, int end);
00203 /* Get info on all cosmids that overlap the range. */
00204 
00205 struct cdaAli *wormCdaAlisInRange(char *chromId, int start, int end);
00206 /* Return list of cdna alignments that overlap range. */
00207 
00208 FILE *wormOpenGoodAli();
00209 /* Opens good alignment file and reads signature. 
00210  * (You can then cdaLoadOne() it.) */
00211 
00212 struct wormGdfCache
00213 /* Helps managed fast indexed access to gene predictions. */
00214     {
00215     char **pDir;
00216     struct snof *snof;
00217     FILE *file;
00218     };
00219 extern struct wormGdfCache wormSangerGdfCache;
00220 extern struct wormGdfCache wormGenieGdfCache;
00221 
00222 struct gdfGene *wormGetGdfGene(char *name);
00223 /* Get the named gdfGene. */
00224 
00225 struct gdfGene *wormGetSomeGdfGene(char *name, struct wormGdfCache *cache);
00226 /* Get a single gdfGene of given name. */
00227 
00228 struct gdfGene *wormGetGdfGeneList(char *baseName, int baseNameSize);
00229 /* Get all gdfGenes that start with a given name. */
00230 
00231 struct gdfGene *wormGetSomeGdfGeneList(char *baseName, int baseNameSize, struct wormGdfCache *cache);
00232 /* Get all gdfGenes that start with a given name. */
00233 
00234 struct gdfGene *wormGdfGenesInRange(char *chrom, int start, int end, 
00235     struct wormGdfCache *geneFinder);
00236 /* Get list of genes in range according to given gene finder. */
00237 
00238 void wormUncacheGdf();
00239 /* Free up resources associated with fast GDF access. */
00240 
00241 void wormUncacheSomeGdf(struct wormGdfCache *cache);
00242 /* Uncache some gene prediction set. */
00243 
00244 #endif /* WORMDNA_H */
00245 

Generated on Tue Dec 25 18:39:29 2007 for blat by  doxygen 1.5.2