00001
00002
00003
00004
00005 #include "common.h"
00006 #include "fuzzyFind.h"
00007 #include "dnaseq.h"
00008
00009 static char const rcsid[] = "$Id: ffAliHelp.c,v 1.10 2005/11/20 19:24:57 kent Exp $";
00010
00011 void ffCat(struct ffAli **pA, struct ffAli **pB)
00012
00013
00014 {
00015 struct ffAli *a = *pA;
00016 struct ffAli *b = *pB;
00017
00018
00019 if (b == NULL)
00020 return;
00021
00022
00023
00024 if (a == NULL)
00025 {
00026 *pA = *pB;
00027 *pB = NULL;
00028 return;
00029 }
00030
00031
00032
00033 while (a->right != NULL) a = a->right;
00034 b->left = a;
00035 a->right = b;
00036 *pB = NULL;
00037 }
00038
00039 void ffAliSort(struct ffAli **pList, int (*compare )(const void *elem1, const void *elem2))
00040
00041 {
00042
00043 struct ffAli *r = *pList;
00044 if (r == NULL) return;
00045
00046
00047
00048
00049 while (r->right) r = r->right;
00050 slSort(&r, compare);
00051
00052
00053 slReverse(&r);
00054 r = ffMakeRightLinks(r);
00055 *pList = r;
00056 }
00057
00058 int ffCmpHitsHayFirst(const void *va, const void *vb)
00059
00060
00061 {
00062 const struct ffAli *a = *((struct ffAli **)va);
00063 const struct ffAli *b = *((struct ffAli **)vb);
00064 int diff;
00065 if ((diff = a->hStart - b->hStart) != 0)
00066 return diff;
00067 return a->nStart - b->nStart;
00068 }
00069
00070 int ffCmpHitsNeedleFirst(const void *va, const void *vb)
00071
00072
00073 {
00074 const struct ffAli *a = *((struct ffAli **)va);
00075 const struct ffAli *b = *((struct ffAli **)vb);
00076 int diff;
00077 if ((diff = a->nStart - b->nStart) != 0)
00078 return diff;
00079 return a->hStart - b->hStart;
00080 }
00081
00082 void ffExpandExactRight(struct ffAli *ali, DNA *needleEnd, DNA *hayEnd)
00083
00084 {
00085 DNA *nEnd = ali->nEnd;
00086 DNA *hEnd = ali->hEnd;
00087 while (nEnd < needleEnd && hEnd < hayEnd)
00088 {
00089 if (*nEnd != *hEnd)
00090 break;
00091 nEnd += 1;
00092 hEnd += 1;
00093 }
00094 ali->nEnd = nEnd;
00095 ali->hEnd = hEnd;
00096 return;
00097 }
00098
00099 void ffExpandExactLeft(struct ffAli *ali, DNA *needleStart, DNA *hayStart)
00100
00101 {
00102 DNA *nStart = ali->nStart-1;
00103 DNA *hStart = ali->hStart-1;
00104 while (nStart >= needleStart && hStart >= hayStart)
00105 {
00106 if (*nStart != *hStart)
00107 break;
00108 nStart -= 1;
00109 hStart -= 1;
00110 }
00111 ali->nStart = nStart + 1;
00112 ali->hStart = hStart + 1;
00113 return;
00114 }
00115
00116 struct ffAli *ffMergeClose(struct ffAli *aliList,
00117 DNA *needleStart, DNA *hayStart)
00118
00119
00120
00121 {
00122 struct ffAli *mid, *ali;
00123 int closeEnough = -3;
00124
00125 if (aliList == NULL)
00126 return NULL;
00127 for (mid = aliList->right; mid != NULL; mid = mid->right)
00128 {
00129 for (ali = aliList; ali != mid; ali = ali->right)
00130 {
00131 char *nStart, *nEnd;
00132 int nOverlap;
00133 nStart = max(ali->nStart, mid->nStart);
00134 nEnd = min(ali->nEnd, mid->nStart);
00135 nOverlap = nEnd - nStart;
00136
00137
00138 if (nOverlap >= closeEnough)
00139 {
00140 int aliDiag = (ali->nStart - needleStart) - (ali->hStart - hayStart);
00141 int midDiag = (mid->nStart - needleStart) - (mid->hStart - hayStart);
00142 if (aliDiag == midDiag)
00143 {
00144
00145 mid->nStart = min(ali->nStart, mid->nStart);
00146 mid->nEnd = max(ali->nEnd, mid->nEnd);
00147 mid->hStart = min(ali->hStart, mid->hStart);
00148 mid->hEnd = max(ali->hEnd, mid->hEnd);
00149 ali->hStart = ali->hEnd = mid->hStart;
00150 ali->nEnd = ali->nStart = mid->nStart;;
00151 }
00152 }
00153 }
00154 }
00155 aliList = ffRemoveEmptyAlis(aliList, TRUE);
00156 return aliList;
00157 }
00158
00159
00160 int ffScoreIntron(DNA a, DNA b, DNA y, DNA z, int orientation)
00161
00162
00163 {
00164 int score = 0;
00165 int revScore = 0;
00166
00167 if (orientation >= 0)
00168 {
00169 if (a == 'g' || a == 'G') ++score;
00170 if (b == 't' || b == 'T') ++score;
00171 if (y == 'a' || y == 'A') ++score;
00172 if (z == 'g' || z == 'G') ++score;
00173 }
00174
00175 if (orientation <= 0)
00176 {
00177 if (a == 'c' || a == 'C') ++revScore;
00178 if (b == 't' || b == 'T') ++revScore;
00179 if (y == 'a' || y == 'A') ++revScore;
00180 if (z == 'c' || z == 'C') ++revScore;
00181 }
00182
00183 return score > revScore ? score : revScore;
00184 }
00185
00186
00187 static int slideIntron(struct ffAli *left, struct ffAli *right, int orientation)
00188
00189
00190 {
00191 DNA *nLeft = left->nEnd;
00192 DNA *hLeft = left->hEnd;
00193 DNA *nRight = right->nStart;
00194 DNA *hRight = right->hStart;
00195 DNA nl, nr, hl, hr;
00196 DNA *nLeftEnd = left->nStart;
00197 DNA *nRightEnd = right->nEnd;
00198 DNA *nBestLeft = NULL;
00199 int bestScore = -0x7fffffff;
00200 int curScore;
00201 int offset;
00202
00203 if (hRight-hLeft < 4)
00204 return 0;
00205 if (nRight-nLeft > 2)
00206 return 0;
00207
00208
00209 while (nLeft > nLeftEnd)
00210 {
00211 nl = nLeft[-1];
00212 hl = hLeft[-1];
00213 nr = nRight[-1];
00214 hr = hRight[-1];
00215 if (!(nl == 'n' && nr == 'n'))
00216 {
00217 if (nr != hr)
00218 break;
00219 }
00220 nLeft -= 1;
00221 hLeft -= 1;
00222 nRight -= 1;
00223 hRight -= 1;
00224 }
00225
00226
00227 while (nRight < nRightEnd)
00228 {
00229 curScore = ffScoreIntron(hLeft[0], hLeft[1], hRight[-2], hRight[-1], orientation);
00230 if (curScore > bestScore)
00231 {
00232 bestScore = curScore;
00233 nBestLeft = nLeft;
00234 }
00235 nl = nLeft[0];
00236 hl = hLeft[0];
00237 if (nl != 'n' && nl != hl)
00238 break;
00239 nr = nRight[0];
00240 hr = hRight[0];
00241 nLeft += 1;
00242 hLeft += 1;
00243 nRight += 1;
00244 hRight += 1;
00245 }
00246 if (nBestLeft == NULL)
00247 return 0;
00248 offset = nBestLeft - left->nEnd;
00249 if (offset == 0)
00250 return offset;
00251 left->nEnd += offset;
00252 left->hEnd += offset;
00253 right->nStart += offset;
00254 right->hStart += offset;
00255 return offset;
00256 }
00257
00258
00259 boolean ffSlideOrientedIntrons(struct ffAli *ali, int orient)
00260
00261
00262 {
00263 struct ffAli *left = ali, *right;
00264 boolean slid = FALSE;
00265 if (left == NULL)
00266 return FALSE;
00267 while((right = left->right) != NULL)
00268 {
00269 if (slideIntron(left, right, orient))
00270 slid = TRUE;
00271 left = right;
00272 }
00273 return slid;
00274 }
00275
00276 boolean ffSlideIntrons(struct ffAli *ali)
00277
00278
00279 {
00280 int orient = ffIntronOrientation(ali);
00281 return ffSlideOrientedIntrons(ali, orient);
00282 }
00283
00284 struct ffAli *ffRemoveEmptyAlis(struct ffAli *ali, boolean doFree)
00285
00286 {
00287 struct ffAli *leftAli;
00288 struct ffAli *startAli;
00289 struct ffAli *rightAli;
00290
00291
00292 while (ali->left)
00293 ali = ali->left;
00294 while (ali)
00295 {
00296
00297 if (ali->nEnd <= ali->nStart || ali->hEnd <= ali->hStart)
00298 {
00299 struct ffAli *empty = ali;
00300 ali = ali->right;
00301 if (doFree) freeMem(empty);
00302 }
00303 else
00304 break;
00305 }
00306 ali->left = NULL;
00307
00308
00309 startAli = leftAli = ali;
00310 ali = ali->right;
00311 while (ali)
00312 {
00313 rightAli = ali->right;
00314 if (ali->nEnd <= ali->nStart || ali->hEnd <= ali->hStart)
00315 {
00316 leftAli->right = rightAli;
00317 if (rightAli != NULL)
00318 rightAli->left = leftAli;
00319 if (doFree) freeMem(ali);
00320 }
00321 else
00322 {
00323 leftAli = ali;
00324 }
00325 ali = rightAli;
00326 }
00327 return startAli;
00328 }
00329
00330 struct ffAli *ffMergeHayOverlaps(struct ffAli *ali)
00331
00332
00333
00334 {
00335 struct ffAli *a = NULL;
00336 struct ffAli *leftA = NULL;
00337
00338 if (ali == NULL)
00339 return NULL;
00340 a = ali;
00341 for (;;)
00342 {
00343 int nOverlap;
00344 int hOverlap;
00345 int aSize;
00346
00347
00348 leftA = a;
00349 a = a->right;
00350 if (a == NULL)
00351 break;
00352
00353 nOverlap = leftA->nEnd - a->nStart;
00354 hOverlap = leftA->hEnd - a->hStart;
00355 aSize = a->nEnd - a->nStart;
00356 if (hOverlap > 0 && hOverlap < aSize && nOverlap <= 0)
00357 {
00358 a->hStart += hOverlap;
00359 a->nStart += hOverlap;
00360 }
00361 }
00362 return ali;
00363 }
00364
00365 struct ffAli *ffMergeNeedleAlis(struct ffAli *ali, boolean doFree)
00366
00367
00368 {
00369 struct ffAli *a = NULL;
00370 struct ffAli *leftA = NULL;
00371 struct ffAli *rightA;
00372
00373 if (ali == NULL)
00374 return NULL;
00375 rightA = ali;
00376 for (;;)
00377 {
00378
00379 leftA = a;
00380 a = rightA;
00381 if (a == NULL)
00382 break;
00383 rightA = a->right;
00384
00385
00386
00387 if (leftA != NULL)
00388 {
00389 int overlap = leftA->nEnd - a->nStart;
00390
00391
00392 if (overlap > 0)
00393 {
00394
00395 if (leftA->nStart <= a->nStart && leftA->nEnd >= a->nEnd)
00396 {
00397
00398 leftA->right = rightA;
00399 if (rightA != NULL)
00400 rightA->left = leftA;
00401 if (doFree) freeMem(a);
00402 a = leftA;
00403 }
00404 else
00405 {
00406
00407
00408 a->hStart += overlap;
00409 a->nStart += overlap;
00410 }
00411 }
00412 else if (overlap == 0 && leftA->hEnd == a->hStart)
00413 {
00414
00415 leftA->right = rightA;
00416 if (rightA != NULL)
00417 rightA->left = leftA;
00418
00419 leftA->nEnd = a->nEnd;
00420 leftA->hEnd = a->hEnd;
00421 if (doFree) freeMem(a);
00422 a = leftA;
00423 }
00424 }
00425 }
00426 return ali;
00427 }
00428