00001
00002
00003
00004
00005
00006
00007 #include "common.h"
00008 #include "axt.h"
00009 #include "pairHmm.h"
00010
00011 static char const rcsid[] = "$Id: pairHmm.c,v 1.5 2005/04/11 06:43:58 kent Exp $";
00012
00013 UBYTE phmmNullMommy = 0;
00014
00015 void phmmUnpackMommy(UBYTE mommy, int *retStateIx, int *retQoff,
00016 int *retToff)
00017
00018 {
00019 *retStateIx = (mommy&31);
00020 *retQoff = -((mommy&32)>>5);
00021 *retToff = -((mommy&64)>>6);
00022 }
00023
00024 struct phmmMatrix *phmmMatrixNew(int stateCount,
00025 char *query, int querySize, char *target, int targetSize)
00026
00027 {
00028 int i;
00029 struct phmmMommy *allCells;
00030 int allCellSize;
00031 int rowSize;
00032 int *allScores;
00033 struct phmmMatrix *am;
00034
00035 AllocVar(am);
00036 am->query = query;
00037 am->target = target;
00038 am->querySize = querySize;
00039 am->targetSize = targetSize;
00040 am->qDim = rowSize = am->querySize + 1;
00041 am->tDim = am->targetSize + 1;
00042 am->stateCount = stateCount;
00043 am->stateSize = rowSize * am->tDim;
00044 am->stateByteSize = am->stateSize * sizeof(struct phmmMommy);
00045 am->states = needMem(stateCount * sizeof(struct phmmState));
00046
00047
00048 allCellSize = stateCount * am->stateByteSize;
00049 am->allCells = allCells = needLargeMem(allCellSize);
00050 memset(allCells, 0, allCellSize);
00051 for (i=0; i<stateCount; ++i)
00052 {
00053 am->states[i].cells = allCells;
00054 allCells += am->stateSize;
00055 am->states[i].stateIx = i;
00056 }
00057
00058
00059 allScores = am->allScores = needMem(rowSize * 2 * stateCount * sizeof(int) );
00060 for (i=0; i<stateCount; ++i)
00061 {
00062 am->states[i].scores = allScores;
00063 allScores += rowSize;
00064 am->states[i].lastScores = allScores;
00065 allScores += rowSize;
00066 }
00067 return am;
00068 }
00069
00070 void phmmMatrixFree(struct phmmMatrix **pAm)
00071
00072
00073 {
00074 struct phmmMatrix *am = *pAm;
00075 if (am != NULL)
00076 {
00077 freeMem(am->states);
00078 freeMem(am->allCells);
00079 freeMem(am->allScores);
00080 freez(pAm);
00081 }
00082 }
00083
00084 struct phmmState *phmmNameState(struct phmmMatrix *am, int stateIx,
00085 char *name, char emitLetter)
00086
00087 {
00088 struct phmmState *state;
00089 assert(stateIx < am->stateCount);
00090 state = &am->states[stateIx];
00091 state->name = name;
00092 state->emitLetter = emitLetter;
00093 return state;
00094 }
00095
00096 static void phmmFindMatrixIx(struct phmmMatrix *am, struct phmmMommy *cell,
00097 int *retStateIx, int *retQix, int *retTix)
00098
00099 {
00100 int cellIx = cell - am->allCells;
00101 *retStateIx = cellIx/am->stateSize;
00102 cellIx %= am->stateSize;
00103 *retTix = cellIx / am->qDim;
00104 *retQix = cellIx % am->qDim;
00105 }
00106
00107 static struct phmmMommy *phmmFindMommy(struct phmmMatrix *am,
00108 struct phmmMommy *baby)
00109
00110 {
00111 int momStateIx, qOff, tOff;
00112 int babyStateIx, qIx, tIx;
00113 UBYTE mommy;
00114
00115 if ((mommy = baby->mommy) == phmmNullMommy)
00116 return NULL;
00117 phmmUnpackMommy(mommy, &momStateIx, &qOff, &tOff);
00118 phmmFindMatrixIx(am, baby, &babyStateIx, &qIx, &tIx);
00119 return am->states[momStateIx].cells + (tOff + tIx) * am->qDim + (qOff + qIx);
00120 }
00121
00122 struct phmmAliPair
00123
00124 {
00125 struct phmmAliPair *next;
00126 int queryIx;
00127 int targetIx;
00128 UBYTE hiddenIx;
00129 char querySym;
00130 char targetSym;
00131 char hiddenSym;
00132 };
00133
00134 struct phmmAliPair *phmmTraceBack(struct phmmMatrix *am, struct phmmMommy *end)
00135
00136
00137 {
00138 struct phmmAliPair *pairList = NULL, *pair;
00139 struct phmmMommy *cell, *parent = end;
00140 int parentSix, parentTix, parentQix;
00141 int sIx, tIx, qIx;
00142
00143 phmmFindMatrixIx(am, parent, &parentSix, &parentQix, &parentTix);
00144 for (;;)
00145 {
00146 cell = parent;
00147 sIx = parentSix;
00148 tIx = parentTix;
00149 qIx = parentQix;
00150
00151 if ((parent = phmmFindMommy(am, cell)) == NULL)
00152 break;
00153 phmmFindMatrixIx(am, parent, &parentSix, &parentQix, &parentTix);
00154
00155 cell->mommy |= phmmMommyTraceBit;
00156
00157 AllocVar(pair);
00158 pair->hiddenIx = (UBYTE)sIx;
00159 pair->hiddenSym = am->states[sIx].emitLetter;
00160
00161 if (parentQix == qIx - 1 && parentTix == tIx - 1 )
00162 {
00163 pair->queryIx = qIx-1;
00164 pair->querySym = am->query[qIx-1];
00165 pair->targetIx = tIx - 1;
00166 pair->targetSym = am->target[tIx-1];
00167 }
00168 else if (parentQix == qIx)
00169 {
00170 pair->queryIx = -1;
00171 pair->querySym = '-';
00172 pair->targetIx = tIx-1;
00173 pair->targetSym = am->target[tIx-1];
00174 }
00175 else
00176 {
00177 pair->queryIx = qIx-1;
00178 pair->querySym = am->query[qIx-1];
00179 pair->targetIx = -1;
00180 pair->targetSym = '-';
00181 }
00182 slAddHead(&pairList, pair);
00183 }
00184 return pairList;
00185 }
00186
00187 void phmmPrintTrace(struct phmmMatrix *am, struct phmmAliPair *pairList,
00188 boolean showStates, FILE *f, boolean extraAtEnds)
00189
00190 {
00191 struct phmmAliPair *pair;
00192 #define lineLen 50
00193 char asyms[lineLen+1];
00194 char qsyms[lineLen+1];
00195 char tsyms[lineLen+1];
00196 char hsyms[lineLen+1];
00197 int lineIx = 0;
00198 int qs, ts;
00199 boolean gotQs = FALSE;
00200
00201 if ((pair = pairList) == NULL)
00202 {
00203 fprintf(f, "Empty pair list\n");
00204 return;
00205 }
00206
00207 qs = pair->queryIx;
00208 ts = pair->targetIx;
00209
00210
00211 if (extraAtEnds)
00212 {
00213 int qStart = qs;
00214 int tStart = ts;
00215 int i;
00216
00217 for (i= -25; i < 0; ++i)
00218 {
00219 int qIx = qStart + i;
00220 int tIx = tStart + i;
00221 qsyms[lineIx] = (qIx >= 0 ? am->query[qIx] : ' ');
00222 tsyms[lineIx] = (tIx >= 0 ? am->target[tIx] : ' ' );
00223 asyms[lineIx] = hsyms[lineIx] = ' ';
00224 if (qIx >= 0 || tIx >= 0)
00225 {
00226 ++lineIx;
00227 if (!gotQs)
00228 {
00229 gotQs = TRUE;
00230 qs = qIx;
00231 ts = tIx;
00232 }
00233 }
00234 }
00235 }
00236
00237
00238 for (pair = pairList; pair != NULL; pair = pair->next)
00239 {
00240 qsyms[lineIx] = pair->querySym;
00241 tsyms[lineIx] = pair->targetSym;
00242 asyms[lineIx] = (pair->querySym == pair->targetSym ? '|' : ' ');
00243 hsyms[lineIx] = pair->hiddenSym;
00244 if (++lineIx == lineLen)
00245 {
00246 qsyms[lineIx] = asyms[lineIx] = tsyms[lineIx] = hsyms[lineIx] = 0;
00247 fprintf(f, "%5d %s\n", qs, qsyms);
00248 fprintf(f, "%5s %s\n", "", asyms);
00249 fprintf(f, "%5d %s\n", ts, tsyms);
00250 if (showStates)
00251 fprintf(f, "%5s %s\n", "", hsyms);
00252 fputc('\n', f);
00253 lineIx = 0;
00254 if (pair->next)
00255 {
00256 qs = pair->next->queryIx;
00257 ts = pair->next->targetIx;
00258 }
00259 }
00260 }
00261
00262
00263 if (extraAtEnds)
00264 {
00265 for (pair = pairList; pair->next != NULL; pair = pair->next)
00266 ;
00267
00268 {
00269 int qIx, tIx, i;
00270 int residue = lineLen - lineIx;
00271 for (i=1; i<=residue; i++)
00272 {
00273 if ((qIx = pair->queryIx+i) >= am->querySize)
00274 qsyms[lineIx] = ' ';
00275 else
00276 qsyms[lineIx] = am->query[qIx];
00277 if ((tIx = pair->targetIx+i) >= am->targetSize)
00278 tsyms[lineIx] = ' ';
00279 else
00280 tsyms[lineIx] = am->target[tIx];
00281 asyms[lineIx] = ' ';
00282 hsyms[lineIx] = ' ';
00283 ++lineIx;
00284 assert(lineIx <= lineLen);
00285 }
00286 }
00287 }
00288
00289
00290 if (lineIx != 0)
00291 {
00292 qsyms[lineIx] = asyms[lineIx] = tsyms[lineIx] = hsyms[lineIx] = 0;
00293 fprintf(f, "%5d %s\n", qs, qsyms);
00294 fprintf(f, "%5s %s\n", "", asyms);
00295 fprintf(f, "%5d %s\n", ts, tsyms);
00296 if (showStates)
00297 fprintf(f, "%5s %s\n", "", hsyms);
00298 fputc('\n', f);
00299 lineIx = 0;
00300 }
00301 #undef lineLen
00302 }
00303
00304 struct axt *phhmTraceToAxt(struct phmmMatrix *am, struct phmmAliPair *pairList,
00305 int score, char *qName, char *tName)
00306
00307 {
00308 struct axt *axt;
00309 struct phmmAliPair *pair;
00310 int qEnd, tEnd;
00311 char *qSym, *tSym;
00312 int i;
00313
00314
00315 if ((pair = pairList) == NULL)
00316 return NULL;
00317
00318
00319 AllocVar(axt);
00320 axt->symCount = slCount(pairList);
00321 axt->qSym = AllocArray(qSym, axt->symCount+1);
00322 axt->tSym = AllocArray(tSym, axt->symCount+1);
00323
00324
00325 axt->qName = cloneString(qName);
00326 axt->tName = cloneString(tName);
00327 axt->qStrand = axt->tStrand = '+';
00328 axt->qStart = qEnd = pair->queryIx;
00329 axt->tStart = tEnd = pair->targetIx;
00330 axt->score = score;
00331
00332
00333 for (i=0, pair = pairList; pair != NULL; pair = pair->next, ++i)
00334 {
00335 qSym[i] = pair->querySym;
00336 tSym[i] = pair->targetSym;
00337 qEnd = pair->queryIx;
00338 tEnd = pair->targetIx;
00339 }
00340
00341
00342 axt->qEnd = qEnd + 1;
00343 axt->tEnd = tEnd + 1;
00344 return axt;
00345 }
00346
00347