inc/maf.h

Go to the documentation of this file.
00001 /* maf.h - Multiple alignment format.  */
00002 #ifndef MAF_H
00003 #define MAF_H
00004 
00005 #ifndef COMMON_H
00006 #include "common.h"
00007 #endif
00008 
00009 #ifndef AXT_H
00010 #include "axt.h"
00011 #endif
00012 
00013 struct mafFile
00014 /* A file full of multiple alignments. */
00015     {
00016     struct mafFile *next;
00017     int version;         /* Required */
00018     char *scoring;       /* Optional (may be NULL). Name of  scoring scheme. */
00019     struct mafAli *alignments;  /* Possibly empty list of alignments. */
00020     struct lineFile *lf; /* Open line file if any. NULL except while parsing. */
00021     };
00022 
00023 void mafFileFree(struct mafFile **pObj);
00024 /* Free up a maf file including closing file handle if necessary. */
00025 
00026 void mafFileFreeList(struct mafFile **pList);
00027 /* Free up a list of maf files. */
00028 
00029 struct mafAli
00030 /* A multiple alignment. */
00031     {
00032     struct mafAli *next;
00033     double score;        /* Score.  Meaning depends on mafFile.scoring.  0.0 if no scoring. */
00034     struct mafComp *components; /* List of components of alignment */
00035     int textSize;         /* Size of text in each component. */
00036     };
00037 
00038 void mafAliFree(struct mafAli **pObj);
00039 /* Free up a maf alignment. */
00040 
00041 void mafAliFreeList(struct mafAli **pList);
00042 /* Free up a list of maf alignmentx. */
00043 
00044 /* the set of syntenic relationships that the previous and
00045  * following alignments have with the current one */
00046 #define MAF_INVERSE_STATUS              'V'
00047 #define MAF_INSERT_STATUS               'I'
00048 #define MAF_CONTIG_STATUS               'C'
00049 #define MAF_CONTIG_NESTED_STATUS        'c'
00050 #define MAF_NEW_STATUS                  'N'
00051 #define MAF_NEW_NESTED_STATUS           'n'
00052 #define MAF_MAYBE_NEW_STATUS            'S'
00053 #define MAF_MAYBE_NEW_NESTED_STATUS     's'
00054 #define MAF_MISSING_STATUS              'M'
00055 
00056 struct mafComp
00057 /* A component of a multiple alignment. */
00058     {
00059     struct mafComp *next;
00060     char *src;   /* Name of sequence source.  */
00061     int srcSize; /* Size of sequence source.  */
00062     char strand; /* Strand of sequence.  Either + or -*/
00063     int start;   /* Start within sequence. Zero based. If strand is - is relative to src end. */
00064     int size;    /* Size in sequence (does not include dashes).  */
00065     char *text;  /* The sequence including dashes. */
00066     char *quality;  /* The quality data (same length as text, or NULL). */
00067     char leftStatus; /* the syntenic status of the alignment before us vis a vis ourselves */
00068     int leftLen;     /* length related information for the previous alignment for the species */
00069     char rightStatus; /* the syntenic status of the alignment after us vis a vis ourselves */
00070     int rightLen;     /* length related information for the following alignment for the species */
00071     };
00072 
00073 void mafCompFree(struct mafComp **pObj);
00074 /* Free up a maf component. */
00075 
00076 void mafCompFreeList(struct mafComp **pList);
00077 /* Free up a list of maf components. */
00078 
00079 int mafPlusStart(struct mafComp *comp);
00080 /* Return start relative to plus strand of src. */
00081 
00082 struct mafFile *mafOpen(char *fileName);
00083 /* Open up a .maf file for reading.  Read header and
00084  * verify. Prepare for subsequent calls to mafNext().
00085  * Prints error message and aborts if there's a problem. */
00086 
00087 struct mafFile *mafMayOpen(char *fileName);
00088 /* Like mafOpen above, but returns NULL rather than aborting 
00089  * if file does not exist. */
00090 
00091 void mafRewind(struct mafFile *mf);
00092 /* Seek to beginning of open maf file */
00093 
00094 struct mafAli *mafNext(struct mafFile *mafFile);
00095 /* Return next alignment in file or NULL if at end. 
00096  * This will close the open file handle at end as well. */
00097 
00098 struct mafAli *mafNextWithPos(struct mafFile *mf, off_t *retOffset);
00099 /* Return next alignment in FILE or NULL if at end.  If retOffset is
00100  * nonNULL, return start offset of record in file. */
00101 
00102 struct mafFile *mafReadAll(char *fileName);
00103 /* Read in full maf file */
00104 
00105 void mafWriteStart(FILE *f, char *scoring);
00106 /* Write maf header and scoring scheme name (may be null) */
00107 
00108 void mafWrite(FILE *f, struct mafAli *maf);
00109 /* Write next alignment to file. */
00110 
00111 void mafWriteEnd(FILE *f);
00112 /* Write end tag of maf file. */
00113 
00114 void mafWriteAll(struct mafFile *mf, char *fileName);
00115 /* Write out full mafFile. */
00116 
00117 struct mafComp *mafMayFindComponent(struct mafAli *maf, char *src);
00118 /* Find component of given source. Return NULL if not found. */
00119 
00120 struct mafComp *mafMayFindComponentDb(struct mafAli *maf, char *db);
00121 /* Find component of given database or source. Return NULL if not found. */
00122 
00123 struct mafComp *mafFindComponent(struct mafAli *maf, char *src);
00124 /* Find component of given source or die trying. */
00125 
00126 struct mafComp *mafMayFindCompSpecies(struct mafAli *maf, char *species, char sepChar);
00127 /* Find component of given source that starts with species followed by sepChar or '\0'
00128    Return NULL if not found. */
00129 
00130 struct mafComp *mafFindCompSpecies(struct mafAli *maf, char *species, char sepChar);
00131 /* Find component of given source that starts with species followed by sepChar or '\0'
00132    or die trying. */
00133 
00134 struct mafComp *mafMayFindCompPrefix(struct mafAli *maf, char *pre, char *sep);
00135 /* Find component of given source that starts with pre followed by sep.
00136    Return NULL if not found. */
00137 
00138 struct mafComp *mafFindCompPrefix(struct mafAli *maf, char *pre, char *sep);
00139 /* Find component of given source that starts with pre followed by sep
00140    or die trying. */
00141 
00142 boolean mafMayFindAllComponents(struct mafAli *maf, struct hash *cHash);
00143 /* Check to see if all components in hash are in maf block.  Return FALSE if not found. */
00144 
00145 struct mafComp *mafMayFindComponentInHash(struct mafAli *maf, struct hash *cHash);
00146 /* Find component of given source that starts matches any string in the cHash.
00147    Return NULL if not found. */
00148 
00149 void mafMoveComponentToTop(struct mafAli *maf, char *componentSource);
00150 /* Move given component to head of component list. */
00151 
00152 struct mafAli *mafFromAxt(struct axt *pAxt, int tSize, 
00153         char *tPrefix, int qSize, char *qPrefix);
00154 /* Make up a maf file from axt.  Slower than mafFromAxtTemp,
00155  * but the axt and maf are independent afterwards. */
00156 
00157 void mafFromAxtTemp(struct axt *axt, int tSize, int qSize,
00158         struct mafAli *temp);
00159 /* Make a maf out of axt,  parasiting on the memory in axt.
00160  * Do *not* mafFree this temp.  The memory it has in pointers
00161  * is still owned by the axt.  Furthermore the next call to
00162  * this function will invalidate the previous temp value.
00163  * It's sort of a kludge, but quick to run and easy to implement. */
00164 
00165 struct mafAli *mafSubset(struct mafAli *maf, char *componentSource,
00166         int newStart, int newEnd);
00167 /* see mafSubsetE below  (called with getInitialDases = FALSE */
00168 
00169 struct mafAli *mafSubsetE(struct mafAli *maf, char *componentSource,
00170         int newStart, int newEnd, bool getInitialDashes);
00171 /* Extract subset of maf that intersects a given range
00172  * in a component sequence.  The newStart and newEnd
00173  * are given in the forward strand coordinates of the
00174  * component sequence.  The componentSource is typically
00175  * something like 'mm3.chr1'.  This will return NULL
00176  * if maf does not intersect range.  The score field
00177  * in the returned maf will not be filled in (since
00178  * we don't know which scoring scheme to use). 
00179  * If getInitialDashes is TRUE then the initial -'s
00180  * in the reference sequence are *not* removed*/
00181 
00182 boolean mafNeedSubset(struct mafAli *maf, char *componentSource,
00183         int newStart, int newEnd);
00184 /* Return TRUE if maf only partially fits between newStart/newEnd
00185  * in given component. */
00186 
00187 double mafScoreMultiz(struct mafAli *maf);
00188 /* Return score of a maf (calculated rather than what is
00189  * stored in the structure. */
00190 
00191 double mafScoreRangeMultiz(struct mafAli *maf, int start, int size);
00192 /* Return score of a subset of an alignment.  Parameters are:
00193  *    maf - the alignment
00194  *    start - the (zero based) offset to start calculating score
00195  *    size - the size of the subset
00196  * The following relationship should hold:
00197  *   scoreRange(maf,start,size) =
00198  *      scoreRange(maf,0,start+size) - scoreRange(maf,0,start)
00199  */
00200 
00201 double mafScoreMultizMaxCol(int species);
00202 /* Return maximum possible score for a column. */
00203 
00204 void mafColMinMaxScore(struct mafAli *maf, 
00205         double *retMin, double *retMax);
00206 /* Get min/max maf scores for a column. */
00207 
00208 void mafFlipStrand(struct mafAli *maf);
00209 /* Reverse complement maf. */
00210 
00211 void mafSrcDb(char *name, char *retDb, int retDbSize);
00212 /* Parse out just database part of name (up to but not including
00213  * first dot). If dot found, return entire name */
00214 
00215 boolean mafColumnEmpty(struct mafAli *maf, int col);
00216 /* Return TRUE if the column is all '-' or '.' */
00217 
00218 void mafStripEmptyColumns(struct mafAli *maf);
00219 /* Remove columns that are all '-' or '.' from  maf. */
00220 
00221 #endif /* MAF_H */
00222 

Generated on Tue Dec 25 18:39:29 2007 for blat by  doxygen 1.5.2