lib/hmmPfamParse.c

Go to the documentation of this file.
00001 /* hmmpfamParse - Parse hmmpfam files.. */
00002 
00003 #include "common.h"
00004 #include "linefile.h"
00005 #include "errabort.h"
00006 #include "spacedColumn.h"
00007 #include "hmmPfamParse.h"
00008 
00009 static char const rcsid[] = "$Id: hmmPfamParse.c,v 1.3 2007/03/23 16:38:40 kent Exp $";
00010 
00011 void hpfModelFree(struct hpfModel **pMod)
00012 /* Free memory associated with hpfModel */
00013 {
00014 struct hpfModel *mod = *pMod;
00015 if (mod != NULL)
00016     {
00017     freeMem(mod->name);
00018     freeMem(mod->description);
00019     slFreeList(&mod->domainList);
00020     freez(pMod);
00021     }
00022 }
00023 
00024 void hpfModelFreeList(struct hpfModel **pList)
00025 /* Free a list of dynamically allocated hpfModel's */
00026 {
00027 struct hpfModel *el, *next;
00028 for (el = *pList; el != NULL; el = next)
00029     {
00030     next = el->next;
00031     hpfModelFree(&el);
00032     }
00033 *pList = NULL;
00034 }
00035 
00036 
00037 void hpfResultFree(struct hpfResult **pHr)
00038 /* Free memory associated with hpfResult */
00039 {
00040 struct hpfResult *hr = *pHr;
00041 if (hr != NULL)
00042     {
00043     freeMem(hr->name);
00044     hpfModelFreeList(&hr->modelList);
00045     freez(pHr);
00046     }
00047 }
00048 
00049 void hpfResultFreeList(struct hpfResult **pList)
00050 /* Free a list of dynamically allocated hpfResult's */
00051 {
00052 struct hpfResult *el, *next;
00053 for (el = *pList; el != NULL; el = next)
00054     {
00055     next = el->next;
00056     hpfResultFree(&el);
00057     }
00058 *pList = NULL;
00059 }
00060 
00061 void parseErr(struct lineFile *lf, char *format, ...)
00062 /* Print out a parse error message. */
00063 {
00064 va_list args;
00065 va_start(args, format);
00066 vaWarn(format, args);
00067 va_end(args);
00068 errAbort("line %d of %s", lf->lineIx, lf->fileName);
00069 }
00070 
00071 char *needLineStartingWith(struct lineFile *lf, char *start, int maxCount)
00072 /* Get next line that starts as so */
00073 {
00074 char *line = lineFileSkipToLineStartingWith(lf, start, maxCount);
00075 if (line == NULL)
00076      parseErr(lf, "Missing line starting with \"%s\"", start);
00077 return line;
00078 }
00079 
00080 void spacedColumnFatten(struct spacedColumn *colList)
00081 /* Make columns extend all the way to the next column. */
00082 {
00083 struct spacedColumn *col, *nextCol;
00084 for (col = colList; col != NULL; col = nextCol)
00085     {
00086     nextCol = col->next;
00087     if (nextCol == NULL)
00088         break;
00089     col->size = nextCol->start - col->start - 1;
00090     }
00091 }
00092 
00093 struct hpfModel *hpfFindResultInModel(struct hpfResult *hr, char *modName)
00094 /* Look for named result in model. */
00095 {
00096 struct hpfModel *mod;
00097 for (mod = hr->modelList; mod != NULL; mod = mod->next)
00098     if (sameString(mod->name, modName))
00099         break;
00100 return mod;
00101 }
00102 
00103 struct hpfResult *hpfNext(struct lineFile *lf)
00104 /* Parse out next record in hmmpfam result file. */
00105 {
00106 /* Seek to first line that starts with "Query sequence:" and parse name out of it. */
00107 char *queryPat = "Query sequence: ";
00108 char *line = lineFileSkipToLineStartingWith(lf, queryPat, 100);
00109 if (line == NULL)
00110     return NULL;
00111 line += strlen(queryPat);
00112 char *query = cloneString(nextWord(&line));
00113 if (query == NULL)
00114     parseErr(lf, "Missing sequence name");
00115 
00116 /* Seek to start of model list, figuring out width of fields we need in the process. */
00117 needLineStartingWith(lf, "Scores for sequence family", 10);
00118 needLineStartingWith(lf, "Model ", 2);
00119 char *template = needLineStartingWith(lf, "----", 1);
00120 struct spacedColumn *colList = spacedColumnFromSample(template);
00121 spacedColumnFatten(colList);
00122 int colCount = slCount(colList);
00123 if (colCount < 5)
00124     parseErr(lf, "Expecting at least 5 columns");
00125 
00126 /* Parse out all the models. */
00127 struct hpfResult *hr;
00128 AllocVar(hr);
00129 hr->name = query;
00130 for (;;)
00131     {
00132     lineFileNeedNext(lf, &line, NULL);
00133     line = skipLeadingSpaces(line);
00134     if (line[0] == 0)
00135         break;
00136     if (startsWith("[no hits above thresholds]", line))
00137         break;
00138     char *row[colCount];
00139     if (!spacedColumnParseLine(colList, line, row))
00140         parseErr(lf, "short line");
00141     struct hpfModel *mod;
00142     AllocVar(mod);
00143     mod->name = cloneString(row[0]);
00144     mod->description = cloneString(row[1]);
00145     mod->score = lineFileNeedDouble(lf, row, 2);
00146     mod->eVal = lineFileNeedDouble(lf, row, 3);
00147     slAddTail(&hr->modelList, mod);
00148     }
00149 slFreeList(&colList);
00150 
00151 /* Skip over to the section on domains, figuriong out column widths while we're at it. */
00152 needLineStartingWith(lf, "Parsed for domains:", 10);
00153 needLineStartingWith(lf, "Model ", 2);
00154 template = needLineStartingWith(lf, "----", 1);
00155 colList = spacedColumnFromSample(template);
00156 colCount = slCount(colList);
00157 if (colCount < 8)
00158     parseErr(lf, "Expecting at least 8 columns.");
00159 struct spacedColumn *col2 = colList->next;
00160 colList->size = col2->start - 1;
00161 
00162 /* Parse out all the domains. */
00163 for (;;)
00164     {
00165     lineFileNeedNext(lf, &line, NULL);
00166     line = skipLeadingSpaces(line);
00167     if (line[0] == 0)
00168         break;
00169     if (startsWith("[no hits above thresholds]", line))
00170         break;
00171     char *row[colCount];
00172     if (!spacedColumnParseLine(colList, line, row))
00173         parseErr(lf, "short line");
00174     struct hpfModel *mod = hpfFindResultInModel(hr, row[0]);
00175     if (mod == NULL)
00176         parseErr(lf, "Model %s in domain section but not model section", row[0]);
00177     struct hpfDomain *dom;
00178     AllocVar(dom);
00179     dom->qStart = lineFileNeedNum(lf, row, 2) - 1;
00180     dom->qEnd = lineFileNeedNum(lf, row, 3);
00181     dom->hmmStart = lineFileNeedNum(lf, row, 4) - 1;
00182     dom->hmmEnd = lineFileNeedNum(lf, row, 5);
00183     dom->score = lineFileNeedDouble(lf, row, 6);
00184     dom->eVal = lineFileNeedDouble(lf, row, 7);
00185     slAddTail(&mod->domainList, dom);
00186     }
00187 slFreeList(&colList);
00188 if (!lineFileSkipToLineStartingWith(lf, "//", 10000000))
00189     parseErr(lf, "Expecting //");
00190 return hr;
00191 }
00192 

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2