00001
00002
00003
00004
00005 #include "common.h"
00006 #include "sig.h"
00007 #include "xa.h"
00008
00009 static char const rcsid[] = "$Id: xa.c,v 1.5 2003/05/06 07:33:44 kate Exp $";
00010
00011 void xaAliFree(struct xaAli *xa)
00012
00013 {
00014 freeMem(xa->name);
00015 freeMem(xa->query);
00016 freeMem(xa->target);
00017 freeMem(xa->qSym);
00018 freeMem(xa->tSym);
00019 freeMem(xa->hSym);
00020 freeMem(xa);
00021 }
00022
00023 void xaAliFreeList(struct xaAli **pXa)
00024
00025 {
00026 struct xaAli *xa, *next;
00027 for (xa = *pXa; xa != NULL; xa = next)
00028 {
00029 next = xa->next;
00030 xaAliFree(xa);
00031 }
00032 *pXa = NULL;
00033 }
00034
00035 int xaAliCmpTarget(const void *va, const void *vb)
00036
00037 {
00038 const struct xaAli *a = *((struct xaAli **)va);
00039 const struct xaAli *b = *((struct xaAli **)vb);
00040 int diff;
00041 if ((diff = strcmp(a->target, b->target)) == 0)
00042 diff = a->tStart - b->tStart;
00043 return diff;
00044 }
00045
00046
00047 FILE *xaOpenVerify(char *fileName)
00048
00049
00050 {
00051 FILE *f = mustOpen(fileName, "rb");
00052 return f;
00053 }
00054
00055 FILE *xaIxOpenVerify(char *fileName)
00056
00057 {
00058 FILE *f;
00059 bits32 sig;
00060 f = mustOpen(fileName, "rb");
00061 mustReadOne(f, sig);
00062 if (sig != xaoSig)
00063 errAbort("Bad signature on %s", fileName);
00064 return f;
00065 }
00066
00067 static void eatLf(FILE *f)
00068
00069 {
00070 int c;
00071 c = fgetc(f);
00072 if (c == '\r')
00073 c = fgetc(f);
00074 if (c != '\n')
00075 errAbort("Expecting new line in cross-species alignment file.");
00076 }
00077
00078 static void eatThroughLf(FILE *f)
00079
00080 {
00081 int c;
00082 while ((c = fgetc(f)) != EOF)
00083 if (c == '\n')
00084 break;
00085 }
00086
00087
00088
00089
00090
00091
00092 struct xaAli *xaReadNext(FILE *f, boolean condensed)
00093
00094
00095 {
00096 char line[512];
00097 char *words[16];
00098 int wordCount;
00099 struct xaAli *xa;
00100 char *parts[5];
00101 int partCount;
00102 double percentScore;
00103 int symCount;
00104 int newOffset = 0;
00105 char *s, *e;
00106
00107
00108 if (fgets(line, sizeof(line), f) == NULL)
00109 return NULL;
00110 wordCount = chopLine(line, words);
00111 if (wordCount < 9)
00112 errAbort("Short line in cross-species alignment file");
00113 if (wordCount == 10)
00114 newOffset = 1;
00115 if (!sameString(words[1], "align"))
00116 errAbort("Bad line in cross-species alignment file");
00117 AllocVar(xa);
00118 xa->name = cloneString(words[0]);
00119 s = words[5+newOffset];
00120 e = strrchr(s, ':');
00121 if (e == NULL)
00122 errAbort("Bad line (no colon) in cross-species alignment file");
00123 *e++ = 0;
00124 partCount = chopString(e, "-", parts, ArraySize(parts));
00125 if (partCount != 2)
00126 errAbort("Bad range format in cross-species alignment file");
00127 if (!condensed)
00128 xa->query = cloneString(s);
00129 xa->qStart = atoi(parts[0]);
00130 xa->qEnd = atoi(parts[1]);
00131 xa->qStrand = words[6+newOffset][0];
00132 partCount = chopString(words[7+newOffset], ":-", parts, ArraySize(parts));
00133 if (!condensed)
00134 xa->target = cloneString(parts[0]);
00135 xa->tStart = atoi(parts[1]);
00136 xa->tEnd = atoi(parts[2]);
00137 xa->tStrand = words[8+newOffset][0];
00138 percentScore = atof(words[2]);
00139 xa->milliScore = round(percentScore*10);
00140 xa->symCount = symCount = atoi(words[4]);
00141
00142
00143 if (condensed)
00144 {
00145 eatThroughLf(f);
00146 eatThroughLf(f);
00147 eatThroughLf(f);
00148 }
00149 else
00150 {
00151 xa->qSym = needMem(symCount+1);
00152 mustRead(f, xa->qSym, symCount);
00153 eatLf(f);
00154
00155 xa->tSym = needMem(symCount+1);
00156 mustRead(f, xa->tSym, symCount);
00157 eatLf(f);
00158
00159 xa->hSym = needMem(symCount+1);
00160 mustRead(f, xa->hSym, symCount);
00161 eatLf(f);
00162 }
00163 return xa;
00164 }
00165
00166 struct xaAli *xaRdRange(FILE *ix, FILE *data,
00167 int start, int end, boolean condensed)
00168
00169
00170
00171 {
00172 int s, e;
00173 int maxS, minE;
00174 long offset;
00175 struct xaAli *list = NULL, *xa;
00176
00177
00178
00179
00180 fseek(ix, sizeof(bits32), SEEK_SET);
00181 for (;;)
00182 {
00183 if (!readOne(ix, s))
00184 break;
00185 mustReadOne(ix, e);
00186 mustReadOne(ix, offset);
00187 if (s >= end)
00188 break;
00189 maxS = max(s, start);
00190 minE = min(e, end);
00191 if (minE - maxS > 0)
00192 {
00193 fseek(data, offset, SEEK_SET);
00194 xa = xaReadNext(data, condensed);
00195 slAddHead(&list, xa);
00196 }
00197 }
00198
00199 slReverse(&list);
00200 return list;
00201 }
00202
00203 struct xaAli *xaReadRange(char *rangeIndexFileName, char *dataFileName,
00204 int start, int end, boolean condensed)
00205
00206
00207 {
00208 FILE *ix = xaIxOpenVerify(rangeIndexFileName);
00209 FILE *data = xaOpenVerify(dataFileName);
00210 struct xaAli *xa = xaRdRange(ix, data, start, end, condensed);
00211 fclose(data);
00212 fclose(ix);
00213 return xa;
00214 }
00215
00216
00217 char *xaAlignSuffix()
00218
00219 {
00220 return ".st";
00221 }
00222
00223 char *xaChromIxSuffix()
00224
00225 {
00226 return ".xao";
00227 }
00228
00229
00230