00001 #include "common.h"
00002 #include "linefile.h"
00003 #include "dystring.h"
00004 #include "phyloTree.h"
00005
00006 struct phyloTree *phyloReadTree(struct lineFile *lf)
00007
00008 {
00009 struct phyloTree *tree = NULL;
00010 char *ptr;
00011 int len;
00012
00013 if (lineFileNext(lf, &ptr, &len) && (len > 0))
00014 tree = phyloParseString(ptr);
00015
00016 return tree;
00017 }
00018
00019 struct phyloTree *phyloOpenTree(char *fileName)
00020 {
00021 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00022 struct phyloTree *tree = phyloReadTree(lf);
00023
00024 lineFileClose(&lf);
00025
00026 return tree;
00027 }
00028
00029
00030 static struct phyloName *parseIdent(char **ptrPtr)
00031
00032 {
00033 struct phyloName *pName;
00034 char *start = *ptrPtr;
00035 char *ptr = *ptrPtr;
00036
00037 AllocVar(pName);
00038
00039 while(isalpha(*ptr) || isdigit(*ptr) || (*ptr == '/')
00040 || (*ptr == '\'')
00041 || (*ptr == '.') || (*ptr == '_'))
00042 ptr++;
00043
00044
00045 if(ptr > start)
00046 {
00047 char val;
00048
00049 val = *ptr;
00050 *ptr = 0;
00051 pName->name = cloneString(start);
00052 *ptr = val;
00053 }
00054
00055
00056 if (*ptr == ':')
00057 {
00058 ptr++;
00059 sscanf(ptr, "%lg", &pName->length);
00060 while ((*ptr != '[') && (*ptr != ')') && (*ptr != ',') && (*ptr != ';'))
00061 ptr++;
00062 }
00063
00064 *ptrPtr = ptr;
00065
00066 return pName;
00067 }
00068
00069 static struct phyloTree *newEdge(struct phyloTree *parent, struct phyloTree *child)
00070 {
00071 parent->numEdges++;
00072
00073 if (parent->numEdges > parent->allocedEdges)
00074 {
00075 int oldSize = parent->allocedEdges * sizeof (struct phyloTree *);
00076 int newSize;
00077
00078 parent->allocedEdges += 5;
00079 newSize = parent->allocedEdges * sizeof (struct phyloTree *);
00080 parent->edges = needMoreMem(parent->edges, oldSize, newSize);
00081 }
00082
00083 child->parent = parent;
00084 return parent->edges[parent->numEdges -1 ] = child;
00085 }
00086
00087 struct phyloTree *phyloAddEdge(struct phyloTree *parent, struct phyloTree *child)
00088
00089 {
00090 return newEdge(parent, child);
00091 }
00092
00093 static struct phyloTree *parseSubTree(char **ptrPtr)
00094
00095 {
00096 struct phyloTree *node = NULL;
00097 char *ptr = *ptrPtr;
00098
00099
00100 if ((*ptr == ';') || (*ptr == ',') || (*ptr == ')') )
00101 return NULL;
00102
00103 AllocVar(node);
00104 if (*ptr == '(')
00105 {
00106 struct phyloTree *edge;
00107
00108 ptr++;
00109
00110 do
00111 {
00112 edge = newEdge(node,parseSubTree(&ptr));
00113 edge->parent = node;
00114 } while (*ptr++ == ',');
00115 --ptr;
00116 if (*ptr++ != ')')
00117 errAbort("unbalanced parenthesis at (%s)",ptr-1);
00118 node->ident = parseIdent(&ptr);
00119 }
00120 else
00121 if ((*ptr == ':') || (isalpha(*ptr))|| (isdigit(*ptr))
00122 || (*ptr == '\'') || (*ptr == '.'))
00123 node->ident = parseIdent(&ptr);
00124 else
00125 errAbort("illegal char '%c' in phyloString",*ptr);
00126
00127 if (*ptr == '[')
00128 {
00129 if (startsWith("[&&NHX:D=Y]",ptr))
00130 node->isDup = TRUE;
00131
00132 while(*ptr != ']')
00133 ptr++;
00134
00135 ptr++;
00136
00137 }
00138
00139 *ptrPtr = ptr;
00140
00141
00142 return node;
00143 }
00144
00145 struct phyloTree *phyloParseString(char *string)
00146
00147 {
00148 struct phyloTree *tree = NULL;
00149 char *ptr = string;
00150
00151 eraseWhiteSpace(string);
00152
00153 tree = parseSubTree(&ptr);
00154
00155 if (*ptr != ';')
00156 errAbort("trees must terminated by ';'");
00157
00158 return tree;
00159 }
00160
00161
00162
00163 static int recurseCount = 0;
00164
00165 static void tabOut(FILE *f)
00166 {
00167 int i;
00168
00169 for(i=0; i < recurseCount; i++)
00170 fputc(' ',f);
00171 }
00172
00173 static void pTree( struct phyloTree *tree,FILE *f, boolean noDups)
00174
00175 {
00176 if (tree)
00177 {
00178 int ii;
00179 if (noDups && (tree->numEdges == 1))
00180 pTree(tree->edges[0], f, noDups);
00181 else
00182 {
00183 if (tree->numEdges)
00184 {
00185 fprintf(f,"(");
00186 for (ii= 0; ii < tree->numEdges; ii++)
00187 {
00188 pTree(tree->edges[ii], f, noDups);
00189 if (ii + 1 < tree->numEdges)
00190 fprintf(f,",");
00191 }
00192 fprintf(f,")");
00193 }
00194 if (tree->ident->name)
00195 fprintf(f,"%s",tree->ident->name);
00196
00197 fprintf(f,":%0.04g", tree->ident->length);
00198 if (tree->isDup)
00199 fprintf(f,"[&&NHX:D=Y]");
00200 }
00201 }
00202 }
00203
00204 void phyloPrintTreeNoDups( struct phyloTree *tree,FILE *f)
00205
00206 {
00207 pTree(tree, f, TRUE);
00208 fprintf(f, ";\n");
00209 }
00210
00211 void phyloPrintTree( struct phyloTree *tree,FILE *f)
00212
00213 {
00214 pTree(tree, f, FALSE);
00215 fprintf(f, ";\n");
00216 }
00217
00218 void phyloDebugTree( struct phyloTree *tree,FILE *f)
00219
00220 {
00221 if (tree)
00222 {
00223 int ii;
00224 fprintf(f,"%s:%g numEdges %d\n",tree->ident->name, tree->ident->length, tree->numEdges);
00225 recurseCount++;
00226 for (ii= 0; ii < tree->numEdges; ii++)
00227 {
00228 tabOut(f);
00229 phyloDebugTree(tree->edges[ii], f);
00230 }
00231 recurseCount--;
00232 }
00233 }
00234
00235 struct phyloTree *phyloFindName( struct phyloTree *tree,char *name )
00236
00237 {
00238 struct phyloTree *subTree = NULL;
00239 int ii;
00240
00241 if (tree->ident->name && sameString(tree->ident->name, name))
00242 return tree;
00243
00244 for (ii=0; ii < tree->numEdges; ii++)
00245 {
00246 if ((subTree = phyloFindName( tree->edges[ii], name)) != NULL)
00247 break;
00248 }
00249
00250 return subTree;
00251 }
00252
00253 void phyloClearTreeMarks(struct phyloTree *tree)
00254
00255 {
00256 int ii;
00257
00258 tree->mark = 0;
00259
00260 for (ii=0; ii < tree->numEdges; ii++)
00261 phyloClearTreeMarks(tree->edges[ii]);
00262 }
00263
00264 struct phyloTree *phyloFindMarkUpTree(struct phyloTree *tree)
00265
00266 {
00267 do
00268 {
00269 if (tree->mark)
00270 return tree;
00271 tree = tree->parent;
00272 } while (tree);
00273
00274 return NULL;
00275 }
00276
00277 void phyloMarkUpTree(struct phyloTree *tree)
00278
00279 {
00280 tree->mark = tree;
00281 for(;tree->parent; tree = tree->parent)
00282 tree->parent->mark = tree;
00283 }
00284
00285 char *phyloFindPath(struct phyloTree *tree, char *ref, char *cross)
00286
00287
00288 {
00289 struct phyloTree *treeRef, *treeCross, *parent;
00290 struct dyString *ds = newDyString(0);
00291
00292 if ((treeRef = phyloFindName(tree,ref)) == NULL)
00293 return NULL;
00294
00295 if ((treeCross = phyloFindName(tree,cross)) == NULL)
00296 return NULL;
00297
00298 phyloClearTreeMarks(tree);
00299 phyloMarkUpTree(treeCross);
00300 if ((parent = phyloFindMarkUpTree(treeRef)) == NULL)
00301 return NULL;
00302
00303
00304 while(treeRef != parent)
00305 {
00306 treeRef = treeRef->parent;
00307 if (ds->stringSize)
00308 dyStringAppendC(ds, ' ');
00309 if (treeRef->ident->name)
00310 dyStringAppendN(ds, treeRef->ident->name, strlen(treeRef->ident->name));
00311 }
00312
00313
00314 while (parent != treeCross)
00315 {
00316 parent = parent->mark;
00317 dyStringAppendC(ds, ' ');
00318 if (parent->ident->name)
00319 dyStringAppendN(ds, parent->ident->name, strlen(parent->ident->name));
00320 }
00321
00322 return ds->string;
00323 }
00324
00325
00326 static void nodeNames(struct phyloTree *tree, struct dyString *ds)
00327
00328 {
00329 int ii;
00330
00331 if (tree->ident->name)
00332 {
00333 dyStringAppendN(ds, tree->ident->name, strlen(tree->ident->name));
00334 dyStringAppendC(ds, ' ');
00335 }
00336
00337 for (ii=0; ii < tree->numEdges; ii++)
00338 nodeNames(tree->edges[ii],ds);
00339 }
00340
00341 char *phyloNodeNames(struct phyloTree *tree)
00342
00343 {
00344 struct dyString *ds = newDyString(0);
00345
00346 nodeNames(tree, ds);
00347
00348 ds->string[ds->stringSize-1]=0;
00349
00350 return ds->string;
00351 }
00352
00353
00354 static void reParent(struct phyloTree *tree)
00355 {
00356 if (tree->parent)
00357 {
00358 struct phyloTree *edge, *saveParent = tree->parent;
00359
00360 reParent(saveParent);
00361 phyloDeleteEdge(saveParent, tree);
00362 edge = newEdge(tree, saveParent);
00363 edge->parent = tree;
00364 edge->ident->length = tree->ident->length;
00365 }
00366 }
00367
00368 struct phyloTree *phyloReRoot(struct phyloTree *tree)
00369
00370 {
00371 reParent(tree);
00372 tree->ident->length = 0;
00373
00374 return tree;
00375 }
00376
00377 void phyloDeleteEdge(struct phyloTree *tree, struct phyloTree *edge)
00378
00379 {
00380 int ii;
00381
00382 for (ii=0; ii < tree->numEdges; ii++)
00383 if (tree->edges[ii] == edge)
00384 {
00385 memcpy(&tree->edges[ii], &tree->edges[ii+1], sizeof(tree) * (tree->numEdges - ii - 1));
00386 tree->numEdges--;
00387
00388 return;
00389 }
00390
00391 errAbort("tried to delete non-existant edge");
00392 }
00393
00394 int phyloCountLeaves(struct phyloTree *tree)
00395 {
00396 int ii, count = 0;
00397
00398 if (tree->numEdges == 0)
00399 return 1;
00400
00401 for (ii=0; ii < tree->numEdges; ii++)
00402 count += phyloCountLeaves(tree->edges[ii]);
00403
00404 return count;
00405 }