00001
00002
00003 #include "common.h"
00004 #include "linefile.h"
00005 #include "errabort.h"
00006 #include "spacedColumn.h"
00007 #include "hmmPfamParse.h"
00008
00009 static char const rcsid[] = "$Id: hmmPfamParse.c,v 1.3 2007/03/23 16:38:40 kent Exp $";
00010
00011 void hpfModelFree(struct hpfModel **pMod)
00012
00013 {
00014 struct hpfModel *mod = *pMod;
00015 if (mod != NULL)
00016 {
00017 freeMem(mod->name);
00018 freeMem(mod->description);
00019 slFreeList(&mod->domainList);
00020 freez(pMod);
00021 }
00022 }
00023
00024 void hpfModelFreeList(struct hpfModel **pList)
00025
00026 {
00027 struct hpfModel *el, *next;
00028 for (el = *pList; el != NULL; el = next)
00029 {
00030 next = el->next;
00031 hpfModelFree(&el);
00032 }
00033 *pList = NULL;
00034 }
00035
00036
00037 void hpfResultFree(struct hpfResult **pHr)
00038
00039 {
00040 struct hpfResult *hr = *pHr;
00041 if (hr != NULL)
00042 {
00043 freeMem(hr->name);
00044 hpfModelFreeList(&hr->modelList);
00045 freez(pHr);
00046 }
00047 }
00048
00049 void hpfResultFreeList(struct hpfResult **pList)
00050
00051 {
00052 struct hpfResult *el, *next;
00053 for (el = *pList; el != NULL; el = next)
00054 {
00055 next = el->next;
00056 hpfResultFree(&el);
00057 }
00058 *pList = NULL;
00059 }
00060
00061 void parseErr(struct lineFile *lf, char *format, ...)
00062
00063 {
00064 va_list args;
00065 va_start(args, format);
00066 vaWarn(format, args);
00067 va_end(args);
00068 errAbort("line %d of %s", lf->lineIx, lf->fileName);
00069 }
00070
00071 char *needLineStartingWith(struct lineFile *lf, char *start, int maxCount)
00072
00073 {
00074 char *line = lineFileSkipToLineStartingWith(lf, start, maxCount);
00075 if (line == NULL)
00076 parseErr(lf, "Missing line starting with \"%s\"", start);
00077 return line;
00078 }
00079
00080 void spacedColumnFatten(struct spacedColumn *colList)
00081
00082 {
00083 struct spacedColumn *col, *nextCol;
00084 for (col = colList; col != NULL; col = nextCol)
00085 {
00086 nextCol = col->next;
00087 if (nextCol == NULL)
00088 break;
00089 col->size = nextCol->start - col->start - 1;
00090 }
00091 }
00092
00093 struct hpfModel *hpfFindResultInModel(struct hpfResult *hr, char *modName)
00094
00095 {
00096 struct hpfModel *mod;
00097 for (mod = hr->modelList; mod != NULL; mod = mod->next)
00098 if (sameString(mod->name, modName))
00099 break;
00100 return mod;
00101 }
00102
00103 struct hpfResult *hpfNext(struct lineFile *lf)
00104
00105 {
00106
00107 char *queryPat = "Query sequence: ";
00108 char *line = lineFileSkipToLineStartingWith(lf, queryPat, 100);
00109 if (line == NULL)
00110 return NULL;
00111 line += strlen(queryPat);
00112 char *query = cloneString(nextWord(&line));
00113 if (query == NULL)
00114 parseErr(lf, "Missing sequence name");
00115
00116
00117 needLineStartingWith(lf, "Scores for sequence family", 10);
00118 needLineStartingWith(lf, "Model ", 2);
00119 char *template = needLineStartingWith(lf, "----", 1);
00120 struct spacedColumn *colList = spacedColumnFromSample(template);
00121 spacedColumnFatten(colList);
00122 int colCount = slCount(colList);
00123 if (colCount < 5)
00124 parseErr(lf, "Expecting at least 5 columns");
00125
00126
00127 struct hpfResult *hr;
00128 AllocVar(hr);
00129 hr->name = query;
00130 for (;;)
00131 {
00132 lineFileNeedNext(lf, &line, NULL);
00133 line = skipLeadingSpaces(line);
00134 if (line[0] == 0)
00135 break;
00136 if (startsWith("[no hits above thresholds]", line))
00137 break;
00138 char *row[colCount];
00139 if (!spacedColumnParseLine(colList, line, row))
00140 parseErr(lf, "short line");
00141 struct hpfModel *mod;
00142 AllocVar(mod);
00143 mod->name = cloneString(row[0]);
00144 mod->description = cloneString(row[1]);
00145 mod->score = lineFileNeedDouble(lf, row, 2);
00146 mod->eVal = lineFileNeedDouble(lf, row, 3);
00147 slAddTail(&hr->modelList, mod);
00148 }
00149 slFreeList(&colList);
00150
00151
00152 needLineStartingWith(lf, "Parsed for domains:", 10);
00153 needLineStartingWith(lf, "Model ", 2);
00154 template = needLineStartingWith(lf, "----", 1);
00155 colList = spacedColumnFromSample(template);
00156 colCount = slCount(colList);
00157 if (colCount < 8)
00158 parseErr(lf, "Expecting at least 8 columns.");
00159 struct spacedColumn *col2 = colList->next;
00160 colList->size = col2->start - 1;
00161
00162
00163 for (;;)
00164 {
00165 lineFileNeedNext(lf, &line, NULL);
00166 line = skipLeadingSpaces(line);
00167 if (line[0] == 0)
00168 break;
00169 if (startsWith("[no hits above thresholds]", line))
00170 break;
00171 char *row[colCount];
00172 if (!spacedColumnParseLine(colList, line, row))
00173 parseErr(lf, "short line");
00174 struct hpfModel *mod = hpfFindResultInModel(hr, row[0]);
00175 if (mod == NULL)
00176 parseErr(lf, "Model %s in domain section but not model section", row[0]);
00177 struct hpfDomain *dom;
00178 AllocVar(dom);
00179 dom->qStart = lineFileNeedNum(lf, row, 2) - 1;
00180 dom->qEnd = lineFileNeedNum(lf, row, 3);
00181 dom->hmmStart = lineFileNeedNum(lf, row, 4) - 1;
00182 dom->hmmEnd = lineFileNeedNum(lf, row, 5);
00183 dom->score = lineFileNeedDouble(lf, row, 6);
00184 dom->eVal = lineFileNeedDouble(lf, row, 7);
00185 slAddTail(&mod->domainList, dom);
00186 }
00187 slFreeList(&colList);
00188 if (!lineFileSkipToLineStartingWith(lf, "//", 10000000))
00189 parseErr(lf, "Expecting //");
00190 return hr;
00191 }
00192