00001 /* maf.h - Multiple alignment format. */ 00002 #ifndef MAF_H 00003 #define MAF_H 00004 00005 #ifndef COMMON_H 00006 #include "common.h" 00007 #endif 00008 00009 #ifndef AXT_H 00010 #include "axt.h" 00011 #endif 00012 00013 struct mafFile 00014 /* A file full of multiple alignments. */ 00015 { 00016 struct mafFile *next; 00017 int version; /* Required */ 00018 char *scoring; /* Optional (may be NULL). Name of scoring scheme. */ 00019 struct mafAli *alignments; /* Possibly empty list of alignments. */ 00020 struct lineFile *lf; /* Open line file if any. NULL except while parsing. */ 00021 }; 00022 00023 void mafFileFree(struct mafFile **pObj); 00024 /* Free up a maf file including closing file handle if necessary. */ 00025 00026 void mafFileFreeList(struct mafFile **pList); 00027 /* Free up a list of maf files. */ 00028 00029 struct mafAli 00030 /* A multiple alignment. */ 00031 { 00032 struct mafAli *next; 00033 double score; /* Score. Meaning depends on mafFile.scoring. 0.0 if no scoring. */ 00034 struct mafComp *components; /* List of components of alignment */ 00035 int textSize; /* Size of text in each component. */ 00036 }; 00037 00038 void mafAliFree(struct mafAli **pObj); 00039 /* Free up a maf alignment. */ 00040 00041 void mafAliFreeList(struct mafAli **pList); 00042 /* Free up a list of maf alignmentx. */ 00043 00044 /* the set of syntenic relationships that the previous and 00045 * following alignments have with the current one */ 00046 #define MAF_INVERSE_STATUS 'V' 00047 #define MAF_INSERT_STATUS 'I' 00048 #define MAF_CONTIG_STATUS 'C' 00049 #define MAF_CONTIG_NESTED_STATUS 'c' 00050 #define MAF_NEW_STATUS 'N' 00051 #define MAF_NEW_NESTED_STATUS 'n' 00052 #define MAF_MAYBE_NEW_STATUS 'S' 00053 #define MAF_MAYBE_NEW_NESTED_STATUS 's' 00054 #define MAF_MISSING_STATUS 'M' 00055 00056 struct mafComp 00057 /* A component of a multiple alignment. */ 00058 { 00059 struct mafComp *next; 00060 char *src; /* Name of sequence source. */ 00061 int srcSize; /* Size of sequence source. */ 00062 char strand; /* Strand of sequence. Either + or -*/ 00063 int start; /* Start within sequence. Zero based. If strand is - is relative to src end. */ 00064 int size; /* Size in sequence (does not include dashes). */ 00065 char *text; /* The sequence including dashes. */ 00066 char *quality; /* The quality data (same length as text, or NULL). */ 00067 char leftStatus; /* the syntenic status of the alignment before us vis a vis ourselves */ 00068 int leftLen; /* length related information for the previous alignment for the species */ 00069 char rightStatus; /* the syntenic status of the alignment after us vis a vis ourselves */ 00070 int rightLen; /* length related information for the following alignment for the species */ 00071 }; 00072 00073 void mafCompFree(struct mafComp **pObj); 00074 /* Free up a maf component. */ 00075 00076 void mafCompFreeList(struct mafComp **pList); 00077 /* Free up a list of maf components. */ 00078 00079 int mafPlusStart(struct mafComp *comp); 00080 /* Return start relative to plus strand of src. */ 00081 00082 struct mafFile *mafOpen(char *fileName); 00083 /* Open up a .maf file for reading. Read header and 00084 * verify. Prepare for subsequent calls to mafNext(). 00085 * Prints error message and aborts if there's a problem. */ 00086 00087 struct mafFile *mafMayOpen(char *fileName); 00088 /* Like mafOpen above, but returns NULL rather than aborting 00089 * if file does not exist. */ 00090 00091 void mafRewind(struct mafFile *mf); 00092 /* Seek to beginning of open maf file */ 00093 00094 struct mafAli *mafNext(struct mafFile *mafFile); 00095 /* Return next alignment in file or NULL if at end. 00096 * This will close the open file handle at end as well. */ 00097 00098 struct mafAli *mafNextWithPos(struct mafFile *mf, off_t *retOffset); 00099 /* Return next alignment in FILE or NULL if at end. If retOffset is 00100 * nonNULL, return start offset of record in file. */ 00101 00102 struct mafFile *mafReadAll(char *fileName); 00103 /* Read in full maf file */ 00104 00105 void mafWriteStart(FILE *f, char *scoring); 00106 /* Write maf header and scoring scheme name (may be null) */ 00107 00108 void mafWrite(FILE *f, struct mafAli *maf); 00109 /* Write next alignment to file. */ 00110 00111 void mafWriteEnd(FILE *f); 00112 /* Write end tag of maf file. */ 00113 00114 void mafWriteAll(struct mafFile *mf, char *fileName); 00115 /* Write out full mafFile. */ 00116 00117 struct mafComp *mafMayFindComponent(struct mafAli *maf, char *src); 00118 /* Find component of given source. Return NULL if not found. */ 00119 00120 struct mafComp *mafMayFindComponentDb(struct mafAli *maf, char *db); 00121 /* Find component of given database or source. Return NULL if not found. */ 00122 00123 struct mafComp *mafFindComponent(struct mafAli *maf, char *src); 00124 /* Find component of given source or die trying. */ 00125 00126 struct mafComp *mafMayFindCompSpecies(struct mafAli *maf, char *species, char sepChar); 00127 /* Find component of given source that starts with species followed by sepChar or '\0' 00128 Return NULL if not found. */ 00129 00130 struct mafComp *mafFindCompSpecies(struct mafAli *maf, char *species, char sepChar); 00131 /* Find component of given source that starts with species followed by sepChar or '\0' 00132 or die trying. */ 00133 00134 struct mafComp *mafMayFindCompPrefix(struct mafAli *maf, char *pre, char *sep); 00135 /* Find component of given source that starts with pre followed by sep. 00136 Return NULL if not found. */ 00137 00138 struct mafComp *mafFindCompPrefix(struct mafAli *maf, char *pre, char *sep); 00139 /* Find component of given source that starts with pre followed by sep 00140 or die trying. */ 00141 00142 boolean mafMayFindAllComponents(struct mafAli *maf, struct hash *cHash); 00143 /* Check to see if all components in hash are in maf block. Return FALSE if not found. */ 00144 00145 struct mafComp *mafMayFindComponentInHash(struct mafAli *maf, struct hash *cHash); 00146 /* Find component of given source that starts matches any string in the cHash. 00147 Return NULL if not found. */ 00148 00149 void mafMoveComponentToTop(struct mafAli *maf, char *componentSource); 00150 /* Move given component to head of component list. */ 00151 00152 struct mafAli *mafFromAxt(struct axt *pAxt, int tSize, 00153 char *tPrefix, int qSize, char *qPrefix); 00154 /* Make up a maf file from axt. Slower than mafFromAxtTemp, 00155 * but the axt and maf are independent afterwards. */ 00156 00157 void mafFromAxtTemp(struct axt *axt, int tSize, int qSize, 00158 struct mafAli *temp); 00159 /* Make a maf out of axt, parasiting on the memory in axt. 00160 * Do *not* mafFree this temp. The memory it has in pointers 00161 * is still owned by the axt. Furthermore the next call to 00162 * this function will invalidate the previous temp value. 00163 * It's sort of a kludge, but quick to run and easy to implement. */ 00164 00165 struct mafAli *mafSubset(struct mafAli *maf, char *componentSource, 00166 int newStart, int newEnd); 00167 /* see mafSubsetE below (called with getInitialDases = FALSE */ 00168 00169 struct mafAli *mafSubsetE(struct mafAli *maf, char *componentSource, 00170 int newStart, int newEnd, bool getInitialDashes); 00171 /* Extract subset of maf that intersects a given range 00172 * in a component sequence. The newStart and newEnd 00173 * are given in the forward strand coordinates of the 00174 * component sequence. The componentSource is typically 00175 * something like 'mm3.chr1'. This will return NULL 00176 * if maf does not intersect range. The score field 00177 * in the returned maf will not be filled in (since 00178 * we don't know which scoring scheme to use). 00179 * If getInitialDashes is TRUE then the initial -'s 00180 * in the reference sequence are *not* removed*/ 00181 00182 boolean mafNeedSubset(struct mafAli *maf, char *componentSource, 00183 int newStart, int newEnd); 00184 /* Return TRUE if maf only partially fits between newStart/newEnd 00185 * in given component. */ 00186 00187 double mafScoreMultiz(struct mafAli *maf); 00188 /* Return score of a maf (calculated rather than what is 00189 * stored in the structure. */ 00190 00191 double mafScoreRangeMultiz(struct mafAli *maf, int start, int size); 00192 /* Return score of a subset of an alignment. Parameters are: 00193 * maf - the alignment 00194 * start - the (zero based) offset to start calculating score 00195 * size - the size of the subset 00196 * The following relationship should hold: 00197 * scoreRange(maf,start,size) = 00198 * scoreRange(maf,0,start+size) - scoreRange(maf,0,start) 00199 */ 00200 00201 double mafScoreMultizMaxCol(int species); 00202 /* Return maximum possible score for a column. */ 00203 00204 void mafColMinMaxScore(struct mafAli *maf, 00205 double *retMin, double *retMax); 00206 /* Get min/max maf scores for a column. */ 00207 00208 void mafFlipStrand(struct mafAli *maf); 00209 /* Reverse complement maf. */ 00210 00211 void mafSrcDb(char *name, char *retDb, int retDbSize); 00212 /* Parse out just database part of name (up to but not including 00213 * first dot). If dot found, return entire name */ 00214 00215 boolean mafColumnEmpty(struct mafAli *maf, int col); 00216 /* Return TRUE if the column is all '-' or '.' */ 00217 00218 void mafStripEmptyColumns(struct mafAli *maf); 00219 /* Remove columns that are all '-' or '.' from maf. */ 00220 00221 #endif /* MAF_H */ 00222
1.5.2