lib/diGraph.c

Go to the documentation of this file.
00001 /* diGraph - Directed graph routines. */
00002 #include "common.h"
00003 #include "hash.h"
00004 #include "dlist.h"
00005 #include "diGraph.h"
00006 
00007 static char const rcsid[] = "$Id: diGraph.c,v 1.9 2005/08/17 03:01:29 galt Exp $";
00008 
00009 struct diGraph *dgNew()
00010 /* Return a new directed graph object. */
00011 {
00012 struct diGraph *dg;
00013 AllocVar(dg);
00014 dg->nodeHash = newHash(0);
00015 dg->edgeList = newDlList();
00016 return dg;
00017 }
00018 
00019 static void dgNodeFree(struct dgNode **pNode)
00020 /* Free a diGraph node. */
00021 {
00022 struct dgNode *node = *pNode;
00023 if (node == NULL)
00024     return;
00025 slFreeList(&node->nextList);
00026 slFreeList(&node->prevList);
00027 freez(pNode);
00028 }
00029 
00030 static void dgNodeFreeList(struct dgNode **pList)
00031 /* Free list of diGraph nodes. */
00032 {
00033 struct dgNode *el, *next;
00034 
00035 for (el = *pList; el != NULL; el = next)
00036     {
00037     next = el->next;
00038     dgNodeFree(&el);
00039     }
00040 *pList = NULL;
00041 }
00042 
00043 void dgFree(struct diGraph **pGraph)
00044 /* Free a directed graph. */
00045 {
00046 struct diGraph *dg = *pGraph;
00047 if (dg == NULL)
00048     return;
00049 freeHash(&dg->nodeHash);
00050 dgNodeFreeList(&dg->nodeList);
00051 freeDlListAndVals(&dg->edgeList);
00052 freez(pGraph);
00053 }
00054 
00055 
00056 struct dgNode *dgAddNode(struct diGraph *dg, char *name, void *val)
00057 /* Create new node in graph. It's legal (but not efficient) to add
00058  * a node with the same name and value twice.  It's not legal to
00059  * add a node with the same name and a different value.  
00060  * You can pass in NULL for the name in which case the 
00061  * hexadecimal representation of val will become the name. */
00062 {
00063 struct dgNode *node;
00064 struct hashEl *hel;
00065 struct hash *hash = dg->nodeHash;
00066 char nbuf[17];
00067 
00068 if (name == NULL)
00069     {
00070     sprintf(nbuf, "%p", val);
00071     name = nbuf;
00072     }
00073 hel = hashLookup(hash, name);
00074 if (hel != NULL)
00075     {
00076     node = hel->val;
00077     if (node->val != val)
00078         {
00079         errAbort("Trying to add node %s with a new value (old 0x%llx new 0x%llx)",
00080             name, ptrToLL(node->val), ptrToLL(val));
00081         }
00082     return node;
00083     }
00084 AllocVar(node);
00085 hel = hashAdd(hash, name, node);
00086 node->name = hel->name;
00087 node->val = val;
00088 slAddHead(&dg->nodeList, node);
00089 return node;
00090 }
00091 
00092 struct dgNode *dgAddNumberedNode(struct diGraph *dg, int id, void *val)
00093 /* Create new node with a number instead of a name. */
00094 {
00095 char buf[16];
00096 sprintf(buf, "%d", id);
00097 return dgAddNode(dg, buf, val);
00098 }
00099 
00100 struct dgNode *dgFindNode(struct diGraph *dg, char *name)
00101 /* Find existing node in graph. Return NULL if not in graph. */
00102 {
00103 struct hashEl *hel;
00104 if ((hel = hashLookup(dg->nodeHash, name)) == NULL)
00105     return NULL;
00106 return hel->val;
00107 }
00108 
00109 struct dgNode *dgFindNumberedNode(struct diGraph *dg, int id)
00110 /* Find node given number. */
00111 {
00112 char buf[16];
00113 sprintf(buf, "%d", id);
00114 return dgFindNode(dg, buf);
00115 }
00116 
00117 
00118 void *dgNodeVal(struct dgNode *node)
00119 /* Return value associated with node. */
00120 {
00121 return node->val;
00122 }
00123 
00124 void *dgNodeName(struct dgNode *node)
00125 /* Return name associated with node. */
00126 {
00127 return node->name;
00128 }
00129 
00130 int dgNodeNumber(struct dgNode *node)
00131 /* Return number of node.  (Will likely return 0 if node
00132  * was added with a name rather than a number). */
00133 {
00134 return atoi(node->name);
00135 }
00136 
00137 struct dgNodeRef *dgFindNodeInRefList(struct dgNodeRef *refList, struct dgNode *node)
00138 /* Return reference to node if in list, or NULL if not. */
00139 {
00140 struct dgNodeRef *ref;
00141 for (ref = refList; ref != NULL; ref = ref->next)
00142     if (ref->node == node)
00143         return ref;
00144 return NULL;
00145 }
00146 
00147 struct dgConnection *dgFindNodeInConList(struct dgConnection *conList, struct dgNode *node)
00148 /* Return connection to node if in list, or NULL if not. */
00149 {
00150 struct dgConnection *con;
00151 for (con = conList; con != NULL; con = con->next)
00152     if (con->node == node)
00153         return con;
00154 return NULL;
00155 }
00156 
00157 
00158 struct dgEdge *dgConnect(struct diGraph *dg, struct dgNode *a, struct dgNode *b)
00159 /* Connect node a to node b.  Returns connecting edge. 
00160  * Not an error to reconnect.  However all connects can 
00161  * be broken with a single disconnect. */
00162 {
00163 return dgConnectWithVal(dg, a, b, NULL);
00164 }
00165 
00166 struct dgEdge *dgConnectWithVal(struct diGraph *dg, struct dgNode *a, 
00167      struct dgNode *b, void *val)
00168 /* Connect node a to node b and put val on edge.  An error to
00169  * reconnect with a different val. */
00170 {
00171 struct dgConnection *con;
00172 struct dgEdge *edge;
00173 struct dlNode *edgeOnList;
00174 
00175 /* Check to see if it's already there. */
00176 if ((con = dgFindNodeInConList(a->nextList, b)) != NULL)
00177     {
00178     edge =  con->edgeOnList->val;
00179     if (val != edge->val)
00180         warn("Trying to add new value to edge between %s and %s, ignoring",
00181            a->name, b->name);
00182     return edge;
00183     }
00184 /* Allocate edge and put on list. */
00185 AllocVar(edge);
00186 edge->a = a;
00187 edge->b = b;
00188 edge->val = val;
00189 edgeOnList = dlAddValTail(dg->edgeList, edge);
00190 
00191 /* Connect nodes to each other. */
00192 AllocVar(con);
00193 con->node = b;
00194 con->edgeOnList = edgeOnList;
00195 slAddHead(&a->nextList, con);
00196 AllocVar(con);
00197 con->node = a;
00198 con->edgeOnList = edgeOnList;
00199 slAddHead(&b->prevList, con);
00200 
00201 return edge;
00202 }
00203 
00204 static struct dlNode *dgRemoveFromConList(struct dgConnection **pConList, 
00205         struct dgNode *node, struct dgConnection **retCon)
00206 /* Remove reference to node from list. */
00207 {
00208 struct dgConnection *newList = NULL;
00209 struct dgConnection *con, *next;
00210 struct dlNode *edgeOnList = NULL;
00211 
00212 for (con = *pConList; con != NULL; con = next)
00213     {
00214     next = con->next;
00215     if (con->node == node)
00216         {
00217         edgeOnList = con->edgeOnList;
00218         if (retCon != NULL)
00219             *retCon = con;
00220         else
00221             freeMem(con);
00222         }
00223     else
00224         {
00225         slAddHead(&newList, con);
00226         }
00227     }
00228 slReverse(&newList);
00229 *pConList = newList;
00230 return edgeOnList;
00231 }
00232 
00233 void dgDisconnect(struct diGraph *dg, struct dgNode *a, struct dgNode *b)
00234 /* Disconnect nodes a and b. */
00235 {
00236 struct dlNode *edgeInList;
00237 struct dgEdge *edge;
00238 
00239 dgRemoveFromConList(&a->nextList, b, NULL);
00240 edgeInList = dgRemoveFromConList(&b->prevList, a, NULL);
00241 if (edgeInList != NULL)
00242     {
00243     edge = edgeInList->val;
00244     dlRemove(edgeInList);
00245     freeMem(edgeInList);
00246     freeMem(edge);
00247     }
00248 }
00249 
00250 
00251 void dgClearVisitFlags(struct diGraph *dg)
00252 /* Clear out visit flags. */
00253 {
00254 struct dgNode *node;
00255 for (node = dg->nodeList; node != NULL; node = node->next)
00256     node->visited = FALSE;
00257 }
00258 
00259 void dgClearConnFlags(struct diGraph *dg)
00260 /* Clear out connect flags. */
00261 {
00262 struct dgNode *node;
00263 for (node = dg->nodeList; node != NULL; node = node->next)
00264     node->conn = FALSE;
00265 }
00266 
00267 static struct dgNode *rTarget;
00268 
00269 static boolean rPathExists(struct dgNode *a)
00270 /* Recursively find if path from a to b exists. */
00271 {
00272 struct dgConnection *ref;
00273 if (a == rTarget)
00274     return TRUE;
00275 a->visited = TRUE;
00276 
00277 for (ref = a->nextList; ref != NULL; ref = ref->next)
00278     {
00279     if (!ref->node->visited && rPathExists(ref->node))
00280         return TRUE;
00281     }
00282 return FALSE;
00283 }
00284 
00285 boolean dgPathExists(struct diGraph *dg, struct dgNode *a, struct dgNode *b)
00286 /* Return TRUE if there's a path from a to b. */
00287 {
00288 rTarget = b;
00289 dgClearVisitFlags(dg);
00290 return rPathExists(a);
00291 }
00292 
00293 static int topoIx;
00294 
00295 static void rTopoSort(struct dgNode *node)
00296 {
00297 struct dgConnection *ref;
00298 node->visited = TRUE;
00299 for (ref = node->nextList; ref != NULL; ref = ref->next)
00300     {
00301     if (!ref->node->visited)
00302         rTopoSort(ref->node);
00303     }
00304 node->topoOrder = ++topoIx;
00305 }
00306 
00307 void dgTopoSort(struct diGraph *dg)
00308 /* Fill in topological order of nodes. */
00309 {
00310 struct dgNode *node;
00311 
00312 topoIx = 0;
00313 dgClearVisitFlags(dg);
00314 for (node = dg->nodeList; node != NULL; node = node->next)
00315     {
00316     if (!node->visited)
00317         rTopoSort(node);
00318     }
00319 }
00320 
00321 static boolean  rHasCycles(struct dgNode *node)
00322 /* Recursively see if has cycles by looking for
00323  * backwards topoOrder. */
00324 {
00325 struct dgConnection *ref;
00326 struct dgNode *child;
00327 
00328 node->visited = TRUE;
00329 for (ref = node->nextList; ref != NULL; ref = ref->next)
00330     {
00331     child = ref->node;
00332     if (child->topoOrder > node->topoOrder)
00333         return TRUE;
00334     if (!child->visited)
00335         if (rHasCycles(child))
00336             return TRUE;
00337     }
00338 return FALSE;
00339 }
00340 
00341 boolean dgHasCycles(struct diGraph *dg)
00342 /* Return TRUE if directed graph has cycles. */
00343 {
00344 struct dgNode *node;
00345 
00346 dgTopoSort(dg);
00347 dgClearVisitFlags(dg);
00348 for (node = dg->nodeList; node != NULL; node = node->next)
00349     {
00350     if (!node->visited)
00351         if (rHasCycles(node))
00352             return TRUE;
00353     }
00354 return FALSE;
00355 }
00356 
00357 struct dgNodeRef *rRefList;
00358 bool rMustHaveVal;                      /* Set to TRUE if rFindConnected only to
00359                                          * consider nodes with values in connections. */
00360 
00361 static void rFindConnected(struct dgNode *a)
00362 /* Find all things connected to a directly or not that haven't
00363  * already been visited and put them on rRefList. */
00364 {
00365 if (!a->conn && (!rMustHaveVal || a->val))
00366     {
00367     struct dgNodeRef *ref;
00368     struct dgConnection *con;
00369     AllocVar(ref);
00370     ref->node = a;
00371     slAddHead(&rRefList, ref);
00372     a->conn = TRUE;
00373     for (con = a->nextList; con != NULL; con = con->next)
00374         rFindConnected(con->node);
00375     for (con = a->prevList; con != NULL; con = con->next)
00376         rFindConnected(con->node);
00377     }
00378 }
00379 
00380 struct dgNodeRef *dgFindNextConnected(struct diGraph *dg)
00381 /* Return list of nodes that make up next connected component,
00382  * or NULL if no more components.  slFreeList this when
00383  * done.  Call "dgClearConnFlags" before first call to this.
00384  * Do not call dgFindConnectedToNode between dgFindFirstConnected 
00385  * and this as they use the same flag variables to keep track of
00386  * what vertices are used. */
00387 {
00388 struct dgNode *a;
00389 
00390 for (a=dg->nodeList; a != NULL; a = a->next)
00391     {
00392     if (!a->conn)
00393         break;
00394     }
00395 if (a == NULL)
00396     return NULL;
00397 rRefList = NULL;
00398 rMustHaveVal = FALSE;
00399 rFindConnected(a);
00400 return rRefList;
00401 }
00402 
00403 struct dgNodeRef *dgFindNextConnectedWithVals(struct diGraph *dg)
00404 /* Like dgFindConnected, but only considers graph nodes that
00405  * have a val attached. */
00406 {
00407 struct dgNode *a;
00408 
00409 for (a=dg->nodeList; a != NULL; a = a->next)
00410     {
00411     if (!a->conn && a->val != NULL)
00412         break;
00413     }
00414 if (a == NULL)
00415     return NULL;
00416 rRefList = NULL;
00417 rMustHaveVal = TRUE;
00418 rFindConnected(a);
00419 return rRefList;
00420 }
00421 
00422 
00423 static int connectedComponents(struct diGraph *dg)
00424 /* Count number of connected components and set component field
00425  * of each node to reflect which component it is in. */
00426 {
00427 struct dgNodeRef *ref;
00428 int conCount = 0;
00429 struct dgNode *a = dg->nodeList;
00430 
00431 dgClearConnFlags(dg);
00432 for (;;)
00433     {
00434     for (; a != NULL; a = a->next)
00435         {
00436         if (!a->conn && (!rMustHaveVal || a->val))
00437             break;
00438         }
00439     if (a == NULL)
00440         break;
00441     rRefList = NULL;
00442     rFindConnected(a);
00443     ++conCount;
00444     for (ref = rRefList; ref != NULL; ref = ref->next)
00445         {
00446         ref->node->component = conCount;
00447         }
00448     slFreeList(&rRefList);
00449     }
00450 return conCount;
00451 }
00452 
00453 int dgConnectedComponents(struct diGraph *dg)
00454 /* Count number of connected components and set component field
00455  * of each node to reflect which component it is in. */
00456 {
00457 rMustHaveVal = FALSE;
00458 return connectedComponents(dg);
00459 }
00460 
00461 int dgConnectedComponentsWithVals(struct diGraph *dg)
00462 /* Count number of connected components and set component field
00463  * of each node to reflect which component it is in. Only
00464  * consider components with values. */
00465 {
00466 rMustHaveVal = TRUE;
00467 return connectedComponents(dg);
00468 }
00469 
00470 struct dgNodeRef *dgFindNewConnected(struct diGraph *dg, struct dgNode *a)
00471 /* Find a connected component guaranteed not to be covered before 
00472  * including a. */
00473 {
00474 rRefList = NULL;
00475 rMustHaveVal = FALSE;
00476 rFindConnected(a);
00477 return rRefList;
00478 }
00479 
00480 struct dgNodeRef *dgFindNewConnectedWithVals(struct diGraph *dg, struct dgNode *a)
00481 /* Find a connected component guaranteed not to be covered before 
00482  * that includes a.  Connected components must have values*/
00483 {
00484 rRefList = NULL;
00485 rMustHaveVal = TRUE;
00486 rFindConnected(a);
00487 return rRefList;
00488 }
00489 
00490 
00491 struct dgNodeRef *dgFindConnectedToNode(struct diGraph *dg, struct dgNode *a)
00492 /* Return reference list of all nodes connected to a, including a.
00493  * slFreeList this list when done. */
00494 {
00495 dgClearConnFlags(dg);
00496 return dgFindNewConnected(dg, a);
00497 }
00498 
00499 struct dgEdge *dgDirectlyFollows(struct diGraph *dg, struct dgNode *a, struct dgNode *b)
00500 /* Return TRUE if b directly follows a. */
00501 {
00502 struct dgConnection *con = dgFindNodeInConList(a->nextList, b);
00503 if (con == NULL)
00504     return NULL;
00505 return con->edgeOnList->val;
00506 }
00507 
00508 struct dgNodeRef *dgFindPath(struct diGraph *dg, struct dgNode *a, struct dgNode *b)
00509 /* Find shortest path from a to b.  Return NULL if can't be found. */
00510 {
00511 struct dgNodeRef *refList  = NULL, *ref;
00512 struct dgConnection *con;
00513 struct dgNode *node, *nNode;
00514 struct dlList *fifo;
00515 struct dlNode *ffNode;
00516 struct dgNode endNode;
00517 int fifoSize = 1;
00518 
00519 /* Do some quick and easy tests first to return if have no way out
00520  * of node A, or if B directly follows A. */
00521 if (a->nextList == NULL)
00522     return NULL;
00523 if (a == b)
00524     {
00525     AllocVar(ref);
00526     ref->node = a;
00527     return ref;
00528     }
00529 if ((con = dgFindNodeInConList(a->nextList, b)) != NULL)
00530     {
00531     AllocVar(refList);
00532     refList->node = a;
00533     node = con->node;
00534     AllocVar(ref);
00535     ref->node = node;
00536     slAddTail(&refList, ref);
00537     return refList;
00538     }
00539 
00540 /* Set up for breadth first traversal.  Will use a doubly linked
00541  * list as a fifo. */
00542 for (node = dg->nodeList; node != NULL; node = node->next)
00543     node->tempEntry = NULL;
00544 fifo = newDlList();
00545 dlAddValTail(fifo, a);
00546 a->tempEntry = &endNode;
00547 
00548 while ((ffNode = dlPopHead(fifo)) != NULL)
00549     {
00550     --fifoSize;
00551     node = ffNode->val;
00552     freeMem(ffNode);
00553     for (con = node->nextList; con != NULL; con = con->next)
00554         {
00555         nNode = con->node;
00556         if (nNode->tempEntry == NULL)
00557             {
00558             nNode->tempEntry = node;
00559             if (nNode == b)
00560                 {
00561                 while (nNode != &endNode && nNode != NULL)
00562                     {
00563                     AllocVar(ref);
00564                     ref->node = nNode;
00565                     slAddHead(&refList, ref);
00566                     nNode = nNode->tempEntry;
00567                     }
00568                 break;
00569                 }
00570             else
00571                 {
00572                 dlAddValTail(fifo, nNode);
00573                 ++fifoSize;
00574                 if (fifoSize > 100000)
00575                     errAbort("Internal error in dgFindPath");
00576                 }
00577             }
00578         }
00579     }
00580 freeDlList(&fifo);
00581 return refList;
00582 }
00583 
00584 static int cmpPriority(const void *va, const void *vb)
00585 /* Sort smallest offset into needle first. */
00586 {
00587 const struct dgNode *a = *((struct dgNode **)va);
00588 const struct dgNode *b = *((struct dgNode **)vb);
00589 return (a->priority - b->priority);
00590 }
00591 
00592 boolean dgParentsAllVisited(struct dgNode *node)
00593 /* Return TRUE if all parents of node have  been visited. */
00594 {
00595 struct dgConnection *con;
00596 for (con = node->prevList; con != NULL; con = con->next)
00597     {
00598     if (con->node->visited == FALSE)
00599         return FALSE;
00600     }
00601 return TRUE;
00602 }
00603 
00604 struct dgNodeRef *dgConstrainedPriorityOrder(struct diGraph *dg)
00605 /* Return traversal of graph in priority order subject to
00606  * constraint that all parents must be output before
00607  * their children regardless of node priority. 
00608  * Graph must be cycle free. */
00609 {
00610 struct dlList *sortedList = newDlList();
00611 struct dgNode *graphNode;
00612 struct dlNode *listNode;
00613 struct dgNodeRef *refList = NULL, *ref;
00614 
00615 if (dgHasCycles(dg))
00616     errAbort("Call to dgConstrainedPriorityOrder on graph with cycles.");
00617 
00618 /* Make up list sorted by priority. */
00619 for (graphNode = dg->nodeList; graphNode != NULL; graphNode = graphNode->next)
00620     {
00621     dlAddValTail(sortedList, graphNode);
00622     graphNode->visited = FALSE;
00623     }
00624 dlSort(sortedList, cmpPriority);
00625 
00626 /* Loop taking first member of list with no untraversed parents. */
00627 while (!dlEmpty(sortedList))
00628     {
00629     for (listNode = sortedList->head; listNode->next != NULL; listNode = listNode->next)
00630         {
00631         graphNode = listNode->val;
00632         if (dgParentsAllVisited(graphNode))
00633             {
00634             dlRemove(listNode);
00635             freeMem(listNode);
00636             AllocVar(ref);
00637             ref->node = graphNode;
00638             slAddHead(&refList, ref);
00639             graphNode->visited = TRUE;
00640             break;
00641             }
00642         }
00643     }
00644 freeDlList(&sortedList);
00645 slReverse(&refList);
00646 return refList;
00647 }
00648 
00649 struct dgEdgeRef *dgFindSubEdges(struct diGraph *dg, struct dgNodeRef *subGraph)
00650 /* Return list of edges in graph that connected together nodes in subGraph. */
00651 {
00652 struct hash *hash = newHash(0);
00653 struct dgNodeRef *nr;
00654 struct dgConnection *con;
00655 struct dgEdgeRef *erList = NULL, *er;
00656 struct dgNode *node;
00657 
00658 /* Build up hash of nodes in subGraph. */
00659 for (nr = subGraph; nr != NULL; nr = nr->next)
00660     {
00661     node = nr->node;
00662     hashAdd(hash, node->name, node);
00663     }
00664 
00665 for (nr = subGraph; nr != NULL; nr = nr->next)
00666     {
00667     node = nr->node;
00668     for (con = node->nextList; con != NULL; con = con->next)
00669         {
00670         if (hashLookup(hash, con->node->name))
00671             {
00672             AllocVar(er);
00673             er->edge = con->edgeOnList->val;
00674             slAddHead(&erList, er);
00675             }
00676         }
00677     }
00678 freeHash(&hash);
00679 return erList;
00680 }
00681 
00682 void dgSwapEdges(struct diGraph *dg, struct dgEdgeRef *erList)
00683 /* Swap polarity of all edges in erList.  (Assumes you don't have
00684  * edges going both directions in graph.) */
00685 {
00686 struct dgEdgeRef *er;
00687 struct dgEdge *edge;
00688 struct dgNode *a, *b;
00689 struct dgConnection *con1, *con2;
00690 
00691 /* Remove edges from next and previous list of all
00692  * involved nodes and swap nodes in edge itself. */
00693 for (er = erList; er != NULL; er = er->next)
00694     {
00695     edge = er->edge;
00696     a = edge->a;
00697     b = edge->b;
00698     dgRemoveFromConList(&a->nextList, b, &con1);
00699     dgRemoveFromConList(&b->prevList, a, &con2);
00700     edge->a = b;
00701     edge->b = a;
00702     con1->node = a;
00703     slAddHead(&b->nextList, con1);
00704     con2->node = b;
00705     slAddHead(&a->prevList, con2);
00706     }
00707 }
00708 
00709 
00710 struct dgConnection *dgNextList(struct dgNode *node)
00711 /* Return list of nodes that follow node. */
00712 {
00713 return node->nextList;
00714 }
00715 
00716 struct dgConnection *dgPrevList(struct dgNode *node)
00717 /* Return list of nodes that precede node. */
00718 {
00719 return node->prevList;
00720 }
00721 
00722 void dgDumpGraph(struct diGraph *dg, FILE *out, boolean hideIsolated)
00723 /* Dump info on graph to output. */
00724 {
00725 struct dgNode *node;
00726 struct dgConnection *con;
00727 
00728 for (node = dg->nodeList; node != NULL; node = node->next)
00729     {
00730     if (hideIsolated  && node->nextList == NULL)
00731         continue;
00732     fprintf(out, "%s:", node->name);
00733     for (con = node->nextList; con != NULL; con = con->next)
00734         fprintf(out, " %s", con->node->name);
00735     fprintf(out, "\n");
00736     }
00737 }
00738 
00739 

Generated on Tue Dec 25 18:39:30 2007 for blat by  doxygen 1.5.2