jkOwnLib/ffAliHelp.c

Go to the documentation of this file.
00001 /* ffAliHelp - Helper routines for things that produce (rather than just
00002  * consume) ffAli type alignments. */
00003 /* Copyright 2000-2003 Jim Kent.  All rights reserved. */
00004 
00005 #include "common.h"
00006 #include "fuzzyFind.h"
00007 #include "dnaseq.h"
00008 
00009 static char const rcsid[] = "$Id: ffAliHelp.c,v 1.10 2005/11/20 19:24:57 kent Exp $";
00010 
00011 void ffCat(struct ffAli **pA, struct ffAli **pB)
00012 /* Concatenate B to the end of A. Eat up second list
00013  * in process. */
00014 {
00015 struct ffAli *a = *pA;
00016 struct ffAli *b = *pB;
00017 
00018 /* If list to add is empty our job is real easy. */
00019 if (b == NULL)
00020     return;
00021 
00022 /* If list to add into is empty, then just switch in the
00023  * second list. */
00024 if (a == NULL)
00025     {
00026     *pA = *pB;
00027     *pB = NULL;
00028     return;
00029     }
00030 
00031 /* Neither list empty.  Find rightmost element of first list
00032  * and cross-link it with leftmost element of second list. */
00033 while (a->right != NULL) a = a->right;
00034 b->left = a;
00035 a->right = b;
00036 *pB = NULL;
00037 }
00038 
00039 void ffAliSort(struct ffAli **pList, int (*compare )(const void *elem1,  const void *elem2))
00040 /* Sort a doubly linked list of ffAlis. */
00041 {
00042 /* Get head of list and handle easy special empty case. */
00043 struct ffAli *r = *pList;
00044 if (r == NULL) return;
00045 
00046 /* Since first pointer is "left", in order to reuse slSort, have
00047  * to jump through some minor hoops. First go to right end of list,
00048  * then sort it. */
00049 while (r->right) r = r->right;
00050 slSort(&r, compare);
00051 
00052 /* We're sorted, but our right links are all broken.  Fix this. */
00053 slReverse(&r);
00054 r = ffMakeRightLinks(r);
00055 *pList = r;
00056 }
00057 
00058 int ffCmpHitsHayFirst(const void *va, const void *vb)
00059 /* Compare function to sort hit array by ascending
00060  * target offset followed by ascending query offset. */
00061 {
00062 const struct ffAli *a = *((struct ffAli **)va);
00063 const struct ffAli *b = *((struct ffAli **)vb);
00064 int diff;
00065 if ((diff = a->hStart - b->hStart) != 0)
00066     return diff;
00067 return a->nStart - b->nStart;
00068 }
00069 
00070 int ffCmpHitsNeedleFirst(const void *va, const void *vb)
00071 /* Compare function to sort hit array by ascending
00072  * query offset followed by ascending target offset. */
00073 {
00074 const struct ffAli *a = *((struct ffAli **)va);
00075 const struct ffAli *b = *((struct ffAli **)vb);
00076 int diff;
00077 if ((diff = a->nStart - b->nStart) != 0)
00078     return diff;
00079 return a->hStart - b->hStart;
00080 }
00081 
00082 void ffExpandExactRight(struct ffAli *ali, DNA *needleEnd, DNA *hayEnd)
00083 /* Expand aligned segment to right as far as can exactly. */
00084 {
00085 DNA *nEnd = ali->nEnd;
00086 DNA *hEnd = ali->hEnd;
00087 while (nEnd < needleEnd && hEnd < hayEnd)
00088     {
00089     if (*nEnd != *hEnd)
00090         break;
00091     nEnd += 1;
00092     hEnd += 1;
00093     }
00094 ali->nEnd = nEnd;
00095 ali->hEnd = hEnd;
00096 return;
00097 }
00098 
00099 void ffExpandExactLeft(struct ffAli *ali, DNA *needleStart, DNA *hayStart)
00100 /* Expand aligned segment to left as far as can exactly. */
00101 {
00102 DNA *nStart = ali->nStart-1;
00103 DNA *hStart = ali->hStart-1;
00104 while (nStart >= needleStart && hStart >= hayStart)
00105     {
00106     if (*nStart != *hStart)
00107         break;
00108     nStart -= 1;
00109     hStart -= 1;
00110     }
00111 ali->nStart = nStart + 1;
00112 ali->hStart = hStart + 1;
00113 return;
00114 }
00115 
00116 struct ffAli *ffMergeClose(struct ffAli *aliList, 
00117         DNA *needleStart, DNA *hayStart)
00118 /* Remove overlapping areas needle in alignment. Assumes ali is sorted on
00119  * ascending nStart field. Also merge perfectly abutting neighbors or
00120  * ones that could be merged at the expense of just a few mismatches.*/
00121 {
00122 struct ffAli *mid, *ali;
00123 int closeEnough = -3;
00124 
00125 if (aliList == NULL)
00126     return NULL;
00127 for (mid = aliList->right; mid != NULL; mid = mid->right)
00128     {
00129     for (ali = aliList; ali != mid; ali = ali->right)
00130         {
00131         char *nStart, *nEnd;
00132         int nOverlap;
00133         nStart = max(ali->nStart, mid->nStart);
00134         nEnd = min(ali->nEnd, mid->nStart);
00135         nOverlap = nEnd - nStart;
00136         /* Overlap or perfectly abut in needle, and needle/hay
00137          * offset the same. */
00138         if (nOverlap >= closeEnough)
00139             {
00140             int aliDiag = (ali->nStart - needleStart) - (ali->hStart - hayStart);
00141             int midDiag = (mid->nStart - needleStart) - (mid->hStart - hayStart);
00142             if (aliDiag == midDiag)
00143                 {
00144                 /* Make mid encompass both, and make ali empty. */
00145                 mid->nStart = min(ali->nStart, mid->nStart);
00146                 mid->nEnd = max(ali->nEnd, mid->nEnd);
00147                 mid->hStart = min(ali->hStart, mid->hStart);
00148                 mid->hEnd = max(ali->hEnd, mid->hEnd);
00149                 ali->hStart = ali->hEnd = mid->hStart;
00150                 ali->nEnd = ali->nStart = mid->nStart;;
00151                 }
00152             }
00153         }
00154     }
00155 aliList = ffRemoveEmptyAlis(aliList, TRUE);
00156 return aliList;
00157 }
00158 
00159 
00160 int ffScoreIntron(DNA a, DNA b, DNA y, DNA z, int orientation)
00161 /* Return a better score the closer an intron is to
00162  * consensus. */
00163 {
00164 int score = 0;
00165 int revScore = 0;
00166 
00167 if (orientation >= 0)
00168     {
00169     if (a == 'g' || a == 'G') ++score;
00170     if (b == 't' || b == 'T') ++score;
00171     if (y == 'a' || y == 'A') ++score;
00172     if (z == 'g' || z == 'G') ++score;
00173     }
00174 
00175 if (orientation <= 0)
00176     {
00177     if (a == 'c' || a == 'C') ++revScore;
00178     if (b == 't' || b == 'T') ++revScore;
00179     if (y == 'a' || y == 'A') ++revScore;
00180     if (z == 'c' || z == 'C') ++revScore;
00181     }
00182 
00183 return score > revScore ? score : revScore;
00184 }
00185 
00186 
00187 static int slideIntron(struct ffAli *left, struct ffAli *right, int orientation)
00188 /* Slides space between alignments if possible to match
00189  * intron consensus better.  Returns how much it slid intron. */
00190 {
00191 DNA *nLeft = left->nEnd;
00192 DNA *hLeft = left->hEnd;
00193 DNA *nRight = right->nStart;
00194 DNA *hRight = right->hStart;
00195 DNA nl, nr, hl, hr;
00196 DNA *nLeftEnd = left->nStart;
00197 DNA *nRightEnd = right->nEnd;
00198 DNA *nBestLeft = NULL;
00199 int bestScore = -0x7fffffff;
00200 int curScore;
00201 int offset;
00202 
00203 if (hRight-hLeft < 4)   /* Too short to be an intron. */
00204     return 0;
00205 if (nRight-nLeft > 2)   /* Too big of a gap to be an intron. */
00206     return 0;
00207 
00208 /* Slide as far to the left as possible without inserting mismatches. */
00209 while (nLeft > nLeftEnd)
00210     {
00211     nl = nLeft[-1];
00212     hl = hLeft[-1];
00213     nr = nRight[-1];
00214     hr = hRight[-1];
00215     if (!(nl == 'n' && nr == 'n'))  /* N's in needle freely slide. */
00216         {
00217         if (nr != hr)
00218             break;
00219         }
00220     nLeft -= 1;
00221     hLeft -= 1;
00222     nRight -= 1;
00223     hRight -= 1;
00224     }
00225 /* Slide as far to the right as possible computing
00226    intron score as you go. */
00227 while (nRight < nRightEnd)
00228     {
00229     curScore = ffScoreIntron(hLeft[0], hLeft[1], hRight[-2], hRight[-1], orientation);
00230     if (curScore > bestScore)
00231         {
00232         bestScore = curScore;
00233         nBestLeft = nLeft;
00234         }
00235     nl = nLeft[0];
00236     hl = hLeft[0];
00237     if (nl != 'n' && nl != hl)
00238         break;
00239     nr = nRight[0];
00240     hr = hRight[0];
00241     nLeft += 1;
00242     hLeft += 1;
00243     nRight += 1;
00244     hRight += 1;
00245     }
00246 if (nBestLeft == NULL)
00247     return 0;
00248 offset = nBestLeft - left->nEnd;
00249 if (offset == 0)
00250     return offset;
00251 left->nEnd += offset;
00252 left->hEnd += offset;
00253 right->nStart += offset;
00254 right->hStart += offset;
00255 return offset;
00256 }
00257 
00258 
00259 boolean ffSlideOrientedIntrons(struct ffAli *ali, int orient)
00260 /* Slide introns (or spaces between aligned blocks)
00261  * to match consensus on given strand. */
00262 {
00263 struct ffAli *left = ali, *right;
00264 boolean slid = FALSE;
00265 if (left == NULL)
00266     return FALSE;
00267 while((right = left->right) != NULL)
00268     {
00269     if (slideIntron(left, right, orient))
00270         slid = TRUE;
00271     left = right;
00272     }
00273 return slid;
00274 }
00275 
00276 boolean ffSlideIntrons(struct ffAli *ali)
00277 /* Slide introns (or spaces between aligned blocks)
00278  * to match consensus.  Return TRUE if any slid. */
00279 {
00280 int orient = ffIntronOrientation(ali);
00281 return ffSlideOrientedIntrons(ali, orient);
00282 }
00283 
00284 struct ffAli *ffRemoveEmptyAlis(struct ffAli *ali, boolean doFree)
00285 /* Remove empty blocks from list. Optionally free empties too. */
00286 {
00287 struct ffAli *leftAli;
00288 struct ffAli *startAli;
00289 struct ffAli *rightAli;
00290 
00291 /* Figure out left most non-empty ali. */
00292 while (ali->left)
00293     ali = ali->left;
00294 while (ali)
00295     {
00296     /* If current ali is empty, chuck it out. */
00297     if (ali->nEnd <= ali->nStart || ali->hEnd <= ali->hStart)
00298         {
00299         struct ffAli *empty = ali;
00300         ali = ali->right;
00301         if (doFree) freeMem(empty);
00302         }
00303     else
00304         break;
00305     }
00306 ali->left = NULL;
00307 
00308 /* Get rid of empty middle alis. */
00309 startAli = leftAli = ali;
00310 ali = ali->right;
00311 while (ali)
00312     {
00313     rightAli = ali->right;
00314     if (ali->nEnd <= ali->nStart || ali->hEnd <= ali->hStart)
00315         {
00316         leftAli->right = rightAli;
00317         if (rightAli != NULL)
00318             rightAli->left = leftAli;
00319         if (doFree) freeMem(ali);
00320         }
00321     else
00322         {
00323         leftAli = ali;
00324         }
00325     ali = rightAli;
00326     }
00327 return startAli;
00328 }
00329 
00330 struct ffAli *ffMergeHayOverlaps(struct ffAli *ali)
00331 /* Remove overlaps in haystack that perfectly abut in needle.
00332  * These are transformed into perfectly abutting haystacks
00333  * that have a gap in the needle. */
00334 {
00335 struct ffAli *a = NULL;
00336 struct ffAli *leftA = NULL;
00337 
00338 if (ali == NULL)
00339     return NULL;
00340 a = ali;
00341 for (;;)
00342     {
00343     int nOverlap;
00344     int hOverlap;
00345     int aSize;
00346 
00347     /* Advance to next ali */
00348     leftA = a;
00349     a = a->right;
00350     if (a == NULL)
00351         break;
00352 
00353     nOverlap = leftA->nEnd - a->nStart;
00354     hOverlap = leftA->hEnd - a->hStart;
00355     aSize = a->nEnd - a->nStart;
00356     if (hOverlap > 0 && hOverlap < aSize && nOverlap <= 0)
00357         {
00358         a->hStart += hOverlap;
00359         a->nStart += hOverlap;
00360         }
00361     }
00362 return ali;
00363 }
00364 
00365 struct ffAli *ffMergeNeedleAlis(struct ffAli *ali, boolean doFree)
00366 /* Remove overlapping areas needle in alignment. Assumes ali is sorted on
00367  * ascending nStart field. Also merge perfectly abutting neighbors.*/
00368 {
00369 struct ffAli *a = NULL;
00370 struct ffAli *leftA = NULL;
00371 struct ffAli *rightA;
00372 
00373 if (ali == NULL)
00374     return NULL;
00375 rightA = ali;
00376 for (;;)
00377     {
00378     /* Advance to next ali */
00379     leftA = a;
00380     a = rightA;
00381     if (a == NULL)
00382         break;
00383     rightA = a->right;
00384     
00385 
00386     /* See if can merge current alignment into left one. */
00387     if (leftA != NULL)
00388         {
00389         int overlap = leftA->nEnd - a->nStart;
00390 
00391         /* Deal with overlaps in needle */
00392         if (overlap > 0)
00393             {
00394             /* See if left encompasses current segment. */
00395             if (leftA->nStart <= a->nStart && leftA->nEnd >= a->nEnd)
00396                 {
00397                 /* Eliminate current segment. */
00398                 leftA->right = rightA;
00399                 if (rightA != NULL)
00400                     rightA->left = leftA;
00401                 if (doFree) freeMem(a);
00402                 a = leftA;
00403                 }
00404             else
00405                 {
00406                 /* Remove overlapping area from current segment, leave
00407                  * it in left segment. */
00408                 a->hStart += overlap;
00409                 a->nStart += overlap;
00410                 }
00411             }
00412         else if (overlap == 0 && leftA->hEnd == a->hStart)
00413             {
00414             /* Remove current segment from list. */
00415             leftA->right = rightA;
00416             if (rightA != NULL)
00417                 rightA->left = leftA;
00418             /* Fold data from current segment into left segment */
00419             leftA->nEnd = a->nEnd;
00420             leftA->hEnd = a->hEnd;
00421             if (doFree) freeMem(a); 
00422             a = leftA;
00423             }
00424         }
00425     }
00426 return ali;
00427 }
00428 

Generated on Tue Dec 25 18:39:29 2007 for blat by  doxygen 1.5.2