lib/xa.c

Go to the documentation of this file.
00001 /* xao.c - Manage cross-species alignments in Intronerator database. 
00002  *
00003  * This file is copyright 2002 Jim Kent, but license is hereby
00004  * granted for all use - public, private or commercial. */
00005 #include "common.h"
00006 #include "sig.h"
00007 #include "xa.h"
00008 
00009 static char const rcsid[] = "$Id: xa.c,v 1.5 2003/05/06 07:33:44 kate Exp $";
00010 
00011 void xaAliFree(struct xaAli *xa)
00012 /* Free up a single xaAli. */
00013 {
00014 freeMem(xa->name);
00015 freeMem(xa->query);
00016 freeMem(xa->target);
00017 freeMem(xa->qSym);
00018 freeMem(xa->tSym);
00019 freeMem(xa->hSym);
00020 freeMem(xa);
00021 }
00022 
00023 void xaAliFreeList(struct xaAli **pXa)
00024 /* Free up a list of xaAlis. */
00025 {
00026 struct xaAli *xa, *next;
00027 for (xa = *pXa; xa != NULL; xa = next)
00028     {
00029     next = xa->next;
00030     xaAliFree(xa);
00031     }
00032 *pXa = NULL;
00033 }
00034 
00035 int xaAliCmpTarget(const void *va, const void *vb)
00036 /* Compare two xaAli's to sort by ascending target positions. */
00037 {
00038 const struct xaAli *a = *((struct xaAli **)va);
00039 const struct xaAli *b = *((struct xaAli **)vb);
00040 int diff;
00041 if ((diff = strcmp(a->target, b->target)) == 0)
00042     diff = a->tStart - b->tStart;
00043 return diff;
00044 }
00045 
00046 
00047 FILE *xaOpenVerify(char *fileName)
00048 /* Open file, verify it's the right type, and
00049  * position file pointer for first xaReadNext(). */
00050 {
00051 FILE *f = mustOpen(fileName, "rb");
00052 return f;
00053 }
00054 
00055 FILE *xaIxOpenVerify(char *fileName)
00056 /* Open file, verify that it's a good xa index. */
00057 {
00058 FILE *f;
00059 bits32 sig;
00060 f = mustOpen(fileName, "rb");
00061 mustReadOne(f, sig);
00062 if (sig != xaoSig)
00063     errAbort("Bad signature on %s", fileName);
00064 return f;
00065 }
00066 
00067 static void eatLf(FILE *f)
00068 /* Read next char and make sure it's a lf. */
00069 {
00070 int c;
00071 c = fgetc(f);
00072 if (c == '\r')
00073     c = fgetc(f);
00074 if (c != '\n')
00075     errAbort("Expecting new line in cross-species alignment file.");
00076 }
00077 
00078 static void eatThroughLf(FILE *f)
00079 /* Read through next lf (discarding results). */
00080 {
00081 int c;
00082 while ((c = fgetc(f)) != EOF)
00083     if (c == '\n')
00084         break;
00085 }
00086 
00087 /* An example line from .st file. 
00088    G11A11.SEQ.c1 align 53.9% of 6096 ACTIN2~1\G11A11.SEQ:0-4999 - v:9730780-9736763 +
00089          0         1     2    3   4             5               6        7          8
00090  */
00091 
00092 struct xaAli *xaReadNext(FILE *f, boolean condensed)
00093 /* Read next xaAli from file. If condensed
00094  * don't fill int query, target, qSym, tSym, or hSym. */
00095 {
00096 char line[512];
00097 char *words[16];
00098 int wordCount;
00099 struct xaAli *xa;
00100 char *parts[5];
00101 int partCount;
00102 double percentScore;
00103 int symCount;
00104 int newOffset = 0;
00105 char *s, *e;
00106 
00107 /* Get first line and parse out everything but the sym lines. */
00108 if (fgets(line, sizeof(line), f) == NULL)
00109     return NULL;
00110 wordCount = chopLine(line, words);
00111 if (wordCount < 9)
00112     errAbort("Short line in cross-species alignment file");
00113 if (wordCount == 10)
00114     newOffset = 1;
00115 if (!sameString(words[1], "align"))
00116     errAbort("Bad line in cross-species alignment file");
00117 AllocVar(xa);
00118 xa->name = cloneString(words[0]);
00119 s = words[5+newOffset];
00120 e = strrchr(s, ':');
00121 if (e == NULL)
00122     errAbort("Bad line (no colon) in cross-species alignment file");
00123 *e++ = 0;
00124 partCount = chopString(e, "-", parts, ArraySize(parts));
00125 if (partCount != 2)
00126     errAbort("Bad range format in cross-species alignment file");
00127 if (!condensed)
00128     xa->query = cloneString(s);
00129 xa->qStart = atoi(parts[0]);
00130 xa->qEnd = atoi(parts[1]);
00131 xa->qStrand = words[6+newOffset][0];
00132 partCount = chopString(words[7+newOffset], ":-", parts, ArraySize(parts));
00133 if (!condensed)
00134     xa->target = cloneString(parts[0]);
00135 xa->tStart = atoi(parts[1]);
00136 xa->tEnd = atoi(parts[2]);
00137 xa->tStrand = words[8+newOffset][0];
00138 percentScore = atof(words[2]);
00139 xa->milliScore = round(percentScore*10);    
00140 xa->symCount = symCount = atoi(words[4]);
00141 
00142 /* Get symbol lines. */
00143 if (condensed)
00144     {
00145     eatThroughLf(f);
00146     eatThroughLf(f);
00147     eatThroughLf(f);
00148     }
00149 else
00150     {
00151     xa->qSym = needMem(symCount+1);
00152     mustRead(f, xa->qSym, symCount);
00153     eatLf(f);
00154 
00155     xa->tSym = needMem(symCount+1);
00156     mustRead(f, xa->tSym, symCount);
00157     eatLf(f);
00158 
00159     xa->hSym = needMem(symCount+1);
00160     mustRead(f, xa->hSym, symCount);
00161     eatLf(f);
00162     }
00163 return xa;
00164 }
00165 
00166 struct xaAli *xaRdRange(FILE *ix, FILE *data, 
00167     int start, int end, boolean condensed)
00168 /* Return list of all xaAlis that range from start to end.  
00169  * Assumes that ix and data files are open. If condensed
00170  * don't fill int query, target, qSym, tSym, or hSym. */
00171 {
00172 int s, e;
00173 int maxS, minE;
00174 long offset;
00175 struct xaAli *list = NULL, *xa;
00176 
00177 
00178 /* Scan through index file looking for things in range.
00179  * When find one read it from data file and add it to list. */
00180 fseek(ix, sizeof(bits32), SEEK_SET);
00181 for (;;)
00182     {
00183     if (!readOne(ix, s))
00184         break;
00185     mustReadOne(ix, e);
00186     mustReadOne(ix, offset);
00187     if (s >= end)
00188         break;
00189     maxS = max(s, start);
00190     minE = min(e, end);
00191     if (minE - maxS > 0)
00192         {
00193         fseek(data, offset, SEEK_SET);
00194         xa = xaReadNext(data, condensed);
00195         slAddHead(&list, xa);
00196         }
00197     }
00198 
00199 slReverse(&list);
00200 return list;
00201 }
00202 
00203 struct xaAli *xaReadRange(char *rangeIndexFileName, char *dataFileName, 
00204     int start, int end, boolean condensed)
00205 /* Return list of all xaAlis that range from start to end.  If condensed
00206  * don't fill int query, target, qSym, tSym, or hSym. */
00207 {
00208 FILE *ix = xaIxOpenVerify(rangeIndexFileName);
00209 FILE *data = xaOpenVerify(dataFileName);
00210 struct xaAli *xa = xaRdRange(ix, data, start, end, condensed);
00211 fclose(data);
00212 fclose(ix);
00213 return xa;
00214 }
00215 
00216 
00217 char *xaAlignSuffix()
00218 /* Return suffix of file with actual alignments. */
00219 {
00220 return ".st";
00221 }
00222 
00223 char *xaChromIxSuffix()
00224 /* Return suffix of files that index xa's by chromosome position. */
00225 {
00226 return ".xao";
00227 }
00228 
00229 
00230 

Generated on Tue Dec 25 18:39:32 2007 for blat by  doxygen 1.5.2