gfServer/gfServer.c

Go to the documentation of this file.
00001 /* gfServer - set up an index of the genome in memory and
00002  * respond to search requests. */
00003 /* Copyright 2001-2003 Jim Kent.  All rights reserved. */
00004 #include "common.h"
00005 #include <signal.h>
00006 #include <sys/socket.h>
00007 #include <netinet/in.h>
00008 #include <netdb.h>
00009 #include "portable.h"
00010 #include "net.h"
00011 #include "dnautil.h"
00012 #include "dnaseq.h"
00013 #include "nib.h"
00014 #include "twoBit.h"
00015 #include "fa.h"
00016 #include "dystring.h"
00017 #include "errabort.h"
00018 #include "memalloc.h"
00019 #include "genoFind.h"
00020 #include "options.h"
00021 #include "trans3.h"
00022 #include "log.h"
00023 
00024 static char const rcsid[] = "$Id: gfServer.c,v 1.54 2007/03/31 19:38:13 markd Exp $";
00025 
00026 static struct optionSpec optionSpecs[] = {
00027     {"canStop", OPTION_BOOLEAN},
00028     {"log", OPTION_STRING},
00029     {"logFacility", OPTION_STRING},
00030     {"mask", OPTION_BOOLEAN},
00031     {"maxAaSize", OPTION_INT},
00032     {"maxDnaHits", OPTION_INT},
00033     {"maxGap", OPTION_INT},
00034     {"maxNtSize", OPTION_INT},
00035     {"maxTransHits", OPTION_INT},
00036     {"minMatch", OPTION_INT},
00037     {"repMatch", OPTION_INT},
00038     {"seqLog", OPTION_BOOLEAN},
00039     {"stepSize", OPTION_INT},
00040     {"tileSize", OPTION_INT},
00041     {"trans", OPTION_BOOLEAN},
00042     {"syslog", OPTION_BOOLEAN},
00043     {NULL, 0}
00044 };
00045 
00046 
00047 int maxNtSize = 40000;
00048 int maxAaSize = 8000;
00049 
00050 int minMatch = gfMinMatch;      /* Can be overridden from command line. */
00051 int tileSize = gfTileSize;      /* Can be overridden from command line. */
00052 int stepSize = 0;               /* Can be overridden from command line. */
00053 boolean doTrans = FALSE;        /* Do translation? */
00054 boolean allowOneMismatch = FALSE; 
00055 int repMatch = 1024;    /* Can be overridden from command line. */
00056 int maxDnaHits = 100;   /* Can be overridden from command line. */
00057 int maxTransHits = 200; /* Can be overridden from command line. */
00058 int maxGap = gfMaxGap;
00059 boolean seqLog = FALSE;
00060 boolean doMask = FALSE;
00061 boolean canStop = FALSE;
00062 
00063 void usage()
00064 /* Explain usage and exit. */
00065 {
00066 errAbort(
00067   "gfServer v %s - Make a server to quickly find where DNA occurs in genome.\n"
00068   "To set up a server:\n"
00069   "   gfServer start host port file(s)\n"
00070   "   Where the files are in .nib or .2bit format\n"
00071   "To remove a server:\n"
00072   "   gfServer stop host port\n"
00073   "To query a server with DNA sequence:\n"
00074   "   gfServer query host port probe.fa\n"
00075   "To query a server with protein sequence:\n"
00076   "   gfServer protQuery host port probe.fa\n"
00077   "To query a server with translated dna sequence:\n"
00078   "   gfServer transQuery host port probe.fa\n"
00079   "To query server with PCR primers\n"
00080   "   gfServer pcr host port fPrimer rPrimer maxDistance\n"
00081   "To process one probe fa file against a .nib format genome (not starting server):\n"
00082   "   gfServer direct probe.fa file(s).nib\n"
00083   "To test pcr without starting server:\n"
00084   "   gfServer pcrDirect fPrimer rPrimer file(s).nib\n"
00085   "To figure out usage level\n"
00086   "   gfServer status host port\n"
00087   "To get input file list\n"
00088   "   gfServer files host port\n"
00089   "Options:\n"
00090   "   -tileSize=N size of n-mers to index.  Default is 11 for nucleotides, 4 for\n"
00091   "               proteins (or translated nucleotides).\n"
00092   "   -stepSize=N spacing between tiles. Default is tileSize.\n"
00093   "   -minMatch=N Number of n-mer matches that trigger detailed alignment\n"
00094   "               Default is 2 for nucleotides, 3 for protiens.\n"
00095   "   -maxGap=N   Number of insertions or deletions allowed between n-mers.\n"
00096   "               Default is 2 for nucleotides, 0 for protiens.\n"
00097   "   -trans  Translate database to protein in 6 frames.  Note: it is best\n"
00098   "           to run this on RepeatMasked data in this case.\n"
00099   "   -log=logFile keep a log file that records server requests.\n"
00100   "   -seqLog    Include sequences in log file (not logged with -syslog)\n"
00101   "   -syslog    Log to syslog\n"
00102   "   -logFacility=facility log to the specified syslog facility - default local0.\n"
00103   "   -mask      Use masking from nib file.\n"
00104   "   -repMatch=N Number of occurrences of a tile (nmer) that trigger repeat masking the tile.\n"
00105   "               Default is %d.\n"
00106   "   -maxDnaHits=N Maximum number of hits for a dna query that are sent from the server.\n"
00107   "               Default is %d.\n"
00108   "   -maxTransHits=N Maximum number of hits for a translated query that are sent from the server.\n"
00109   "               Default is %d.\n"
00110   "   -maxNtSize=N Maximum size of untranslated DNA query sequence\n"
00111   "               Default is %d\n"
00112   "   -maxAsSize=N Maximum size of protein or translated DNA queries\n"
00113   "               Default is %d\n"
00114   "   -canStop If set then a quit message will actually take down the\n"
00115   "            server\n"
00116   ,     gfVersion, repMatch, maxDnaHits, maxTransHits, maxNtSize, maxAaSize
00117   );
00118 
00119 }
00120 
00121 void genoFindDirect(char *probeName, int fileCount, char *seqFiles[])
00122 /* Don't set up server - just directly look for matches. */
00123 {
00124 struct genoFind *gf = NULL;
00125 struct lineFile *lf = lineFileOpen(probeName, TRUE);
00126 struct dnaSeq seq;
00127 int hitCount = 0, clumpCount = 0, oneHit;
00128 ZeroVar(&seq);
00129 
00130 if (doTrans)
00131     errAbort("Don't support translated direct stuff currently, sorry");
00132 
00133 gf = gfIndexNibsAndTwoBits(fileCount, seqFiles, minMatch, maxGap, 
00134         tileSize, repMatch, FALSE,
00135         allowOneMismatch, stepSize);
00136 
00137 while (faSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
00138     {
00139     struct lm *lm = lmInit(0);
00140     struct gfClump *clumpList = gfFindClumps(gf, &seq, lm, &oneHit), *clump;
00141     hitCount += oneHit;
00142     for (clump = clumpList; clump != NULL; clump = clump->next)
00143         {
00144         ++clumpCount;
00145         printf("%s ", seq.name);
00146         gfClumpDump(gf, clump, stdout);
00147         }
00148     gfClumpFreeList(&clumpList);
00149     lmCleanup(&lm);
00150     }
00151 lineFileClose(&lf);
00152 genoFindFree(&gf);
00153 }
00154 
00155 void genoPcrDirect(char *fPrimer, char *rPrimer, int fileCount, char *seqFiles[])
00156 /* Do direct PCR for testing purposes. */
00157 {
00158 struct genoFind *gf = NULL;
00159 int fPrimerSize = strlen(fPrimer);
00160 int rPrimerSize = strlen(rPrimer);
00161 struct gfClump *clumpList, *clump;
00162 time_t startTime, endTime;
00163 
00164 startTime = clock1000();
00165 gf = gfIndexNibsAndTwoBits(fileCount, seqFiles, minMatch, maxGap, 
00166         tileSize, repMatch, FALSE,
00167         allowOneMismatch, stepSize);
00168 endTime = clock1000();
00169 printf("Index built in %4.3f seconds\n", 0.001 * (endTime - startTime));
00170 
00171 printf("plus strand:\n");
00172 startTime = clock1000();
00173 clumpList = gfPcrClumps(gf, fPrimer, fPrimerSize, rPrimer, rPrimerSize, 0, 4*1024);
00174 endTime = clock1000();
00175 printf("Index searched in %4.3f seconds\n", 0.001 * (endTime - startTime));
00176 for (clump = clumpList; clump != NULL; clump = clump->next)
00177     {
00178     /* Clumps from gfPcrClumps have already had target->start subtracted out 
00179      * of their coords, but gfClumpDump assumes they have not and does the 
00180      * subtraction; rather than write a new gfClumpDump, tweak here: */
00181     clump->tStart += clump->target->start;
00182     clump->tEnd += clump->target->start;
00183     gfClumpDump(gf, clump, stdout);
00184     }
00185 printf("minus strand:\n");
00186 startTime = clock1000();
00187 clumpList = gfPcrClumps(gf, rPrimer, rPrimerSize, fPrimer, fPrimerSize, 0, 4*1024);
00188 endTime = clock1000();
00189 printf("Index searched in %4.3f seconds\n", 0.001 * (endTime - startTime));
00190 for (clump = clumpList; clump != NULL; clump = clump->next)
00191     {
00192     /* Same as above, tweak before gfClumpDump: */
00193     clump->tStart += clump->target->start;
00194     clump->tEnd += clump->target->start;
00195     gfClumpDump(gf, clump, stdout);
00196     }
00197 
00198 genoFindFree(&gf);
00199 }
00200 
00201 int getPortIx(char *portName)
00202 /* Convert from ascii to integer. */
00203 {
00204 if (!isdigit(portName[0]))
00205     errAbort("Expecting a port number got %s", portName);
00206 return atoi(portName);
00207 }
00208 
00209 struct sockaddr_in sai;         /* Some system socket info. */
00210 
00211 /* Some variables to gather statistics on usage. */
00212 long baseCount = 0, blatCount = 0, aaCount = 0, pcrCount = 0;
00213 int warnCount = 0;
00214 int noSigCount = 0;
00215 int missCount = 0;
00216 int trimCount = 0;
00217 
00218 void dnaQuery(struct genoFind *gf, struct dnaSeq *seq, 
00219         int connectionHandle, char buf[256])    
00220 /* Handle a query for DNA/DNA match. */
00221 {
00222 struct gfClump *clumpList = NULL, *clump;
00223 int limit = 1000;
00224 int clumpCount = 0, hitCount = -1;
00225 struct lm *lm = lmInit(0);
00226 
00227 if (seq->size > gf->tileSize + gf->stepSize + gf->stepSize)
00228      limit = maxDnaHits;
00229 clumpList = gfFindClumps(gf, seq, lm, &hitCount);
00230 if (clumpList == NULL)
00231     ++missCount;
00232 for (clump = clumpList; clump != NULL; clump = clump->next)
00233     {
00234     struct gfSeqSource *ss = clump->target;
00235     sprintf(buf, "%d\t%d\t%s\t%d\t%d\t%d", 
00236         clump->qStart, clump->qEnd, ss->fileName,
00237         clump->tStart-ss->start, clump->tEnd-ss->start, clump->hitCount);
00238     netSendString(connectionHandle, buf);
00239     ++clumpCount;
00240     if (--limit < 0)
00241         break;
00242     }
00243 gfClumpFreeList(&clumpList);
00244 lmCleanup(&lm);
00245 logDebug("%lu %d clumps, %d hits", clock1000(), clumpCount, hitCount);
00246 }
00247 
00248 void transQuery(struct genoFind *transGf[2][3], aaSeq *seq, 
00249         int connectionHandle, char buf[256])    
00250 /* Handle a query for protein/translated DNA match. */
00251 {
00252 struct gfClump *clumps[3], *clump;
00253 int isRc, frame;
00254 char strand;
00255 struct dyString *dy  = newDyString(1024);
00256 struct gfHit *hit;
00257 int clumpCount = 0, hitCount = 0, oneHit;
00258 struct lm *lm = lmInit(0);
00259 
00260 sprintf(buf, "tileSize %d", tileSize);
00261 netSendString(connectionHandle, buf);
00262 for (frame = 0; frame < 3; ++frame)
00263     clumps[frame] = NULL;
00264 for (isRc = 0; isRc <= 1; ++isRc)
00265     {
00266     strand = (isRc ? '-' : '+');
00267     gfTransFindClumps(transGf[isRc], seq, clumps, lm, &oneHit);
00268     hitCount += oneHit;
00269     for (frame = 0; frame < 3; ++frame)
00270         {
00271         int limit = maxTransHits;
00272         for (clump = clumps[frame]; clump != NULL; clump = clump->next)
00273             {
00274             struct gfSeqSource *ss = clump->target;
00275             sprintf(buf, "%d\t%d\t%s\t%d\t%d\t%d\t%c\t%d", 
00276                 clump->qStart, clump->qEnd, ss->fileName,
00277                 clump->tStart-ss->start, clump->tEnd-ss->start, clump->hitCount,
00278                 strand, frame);
00279             netSendString(connectionHandle, buf);
00280             dyStringClear(dy);
00281             for (hit = clump->hitList; hit != NULL; hit = hit->next)
00282                 dyStringPrintf(dy, " %d %d", hit->qStart, hit->tStart - ss->start);
00283             netSendLongString(connectionHandle, dy->string);
00284             ++clumpCount;
00285             if (--limit < 0)
00286                 break;
00287             }
00288         gfClumpFreeList(&clumps[frame]);
00289         }
00290     }
00291 if (clumpCount == 0)
00292     ++missCount;
00293 freeDyString(&dy);
00294 lmCleanup(&lm);
00295 logDebug("%lu %d clumps, %d hits", clock1000(), clumpCount, hitCount);
00296 }
00297 
00298 void transTransQuery(struct genoFind *transGf[2][3], struct dnaSeq *seq, 
00299         int connectionHandle, char buf[256])    
00300 /* Handle a query for protein/translated DNA match. */
00301 {
00302 struct gfClump *clumps[3][3], *clump;
00303 int isRc, qFrame, tFrame;
00304 char strand;
00305 struct trans3 *t3 = trans3New(seq);
00306 struct dyString *dy  = newDyString(1024);
00307 struct gfHit *hit;
00308 int clumpCount = 0, hitCount = 0, oneCount;
00309 
00310 sprintf(buf, "tileSize %d", tileSize);
00311 netSendString(connectionHandle, buf);
00312 for (qFrame = 0; qFrame<3; ++qFrame)
00313     for (tFrame=0; tFrame<3; ++tFrame)
00314         clumps[qFrame][tFrame] = NULL;
00315 for (isRc = 0; isRc <= 1; ++isRc)
00316     {
00317     struct lm *lm = lmInit(0);
00318     strand = (isRc ? '-' : '+');
00319     gfTransTransFindClumps(transGf[isRc], t3->trans, clumps, lm, &oneCount);
00320     hitCount += oneCount;
00321     for (qFrame = 0; qFrame<3; ++qFrame)
00322         {
00323         for (tFrame=0; tFrame<3; ++tFrame)
00324             {
00325             int limit = maxTransHits;
00326             for (clump = clumps[qFrame][tFrame]; clump != NULL; clump = clump->next)
00327                 {
00328                 struct gfSeqSource *ss = clump->target;
00329                 sprintf(buf, "%d\t%d\t%s\t%d\t%d\t%d\t%c\t%d\t%d", 
00330                     clump->qStart, clump->qEnd, ss->fileName,
00331                     clump->tStart-ss->start, clump->tEnd-ss->start, clump->hitCount,
00332                     strand, qFrame, tFrame);
00333                 netSendString(connectionHandle, buf);
00334                 dyStringClear(dy);
00335                 for (hit = clump->hitList; hit != NULL; hit = hit->next)
00336                     {
00337                     dyStringPrintf(dy, " %d %d", hit->qStart, hit->tStart - ss->start);
00338                     }
00339                 netSendLongString(connectionHandle, dy->string);
00340                 ++clumpCount;
00341                 if (--limit < 0)
00342                     break;
00343                 }
00344             gfClumpFreeList(&clumps[qFrame][tFrame]);
00345             }
00346         }
00347     lmCleanup(&lm);
00348     }
00349 trans3Free(&t3);
00350 if (clumpCount == 0)
00351     ++missCount;
00352 logDebug("%lu %d clumps, %d hits", clock1000(), clumpCount, hitCount);
00353 }
00354 
00355 static void pcrQuery(struct genoFind *gf, char *fPrimer, char *rPrimer, 
00356         int maxDistance, int connectionHandle)
00357 /* Do PCR query and report results down socket. */
00358 {
00359 int fPrimerSize = strlen(fPrimer);
00360 int rPrimerSize = strlen(rPrimer);
00361 struct gfClump *clumpList, *clump;
00362 int clumpCount = 0;
00363 char buf[256];
00364 
00365 clumpList = gfPcrClumps(gf, fPrimer, fPrimerSize, rPrimer, rPrimerSize, 0, maxDistance);
00366 for (clump = clumpList; clump != NULL; clump = clump->next)
00367     {
00368     struct gfSeqSource *ss = clump->target;
00369     safef(buf, sizeof(buf), "%s\t%d\t%d\t+", ss->fileName, 
00370         clump->tStart, clump->tEnd);
00371     netSendString(connectionHandle, buf);
00372     ++clumpCount;
00373     }
00374 gfClumpFreeList(&clumpList);
00375 
00376 clumpList = gfPcrClumps(gf, rPrimer, rPrimerSize, fPrimer, fPrimerSize, 0, maxDistance);
00377 
00378 for (clump = clumpList; clump != NULL; clump = clump->next)
00379     {
00380     struct gfSeqSource *ss = clump->target;
00381     safef(buf, sizeof(buf), "%s\t%d\t%d\t-", ss->fileName, 
00382         clump->tStart, clump->tEnd);
00383     netSendString(connectionHandle, buf);
00384     ++clumpCount;
00385     }
00386 gfClumpFreeList(&clumpList);
00387 netSendString(connectionHandle, "end");
00388 logDebug("%lu PCR %s %s %d clumps\n", clock1000(), fPrimer, rPrimer, clumpCount);
00389 }
00390 
00391 
00392 static jmp_buf gfRecover;
00393 static char *ripCord = NULL;    /* A little memory to give back to system
00394                                  * during error recovery. */
00395 
00396 static void gfAbort()
00397 /* Abort query. */
00398 {
00399 freez(&ripCord);
00400 longjmp(gfRecover, -1);
00401 }
00402 
00403 static void errorSafeSetup()
00404 /* Start up error safe stuff. */
00405 {
00406 memTrackerStart();
00407 pushAbortHandler(gfAbort);
00408 ripCord = needMem(64*1024); /* Memory for error recovery. memTrackerEnd frees */
00409 }
00410 
00411 static void errorSafeCleanupMess(int connectionHandle, char *message)
00412 /* Clean up and report problem. */
00413 {
00414 popAbortHandler();
00415 logError("Recovering from error via longjmp");
00416 netSendString(connectionHandle, message);
00417 }
00418 
00419 static void errorSafeQuery(boolean doTrans, boolean queryIsProt, 
00420         struct dnaSeq *seq, struct genoFind *gf, struct genoFind *transGf[2][3], 
00421         int connectionHandle, char *buf)
00422 /* Wrap error handling code around index query. */
00423 {
00424 int status;
00425 errorSafeSetup();
00426 status = setjmp(gfRecover);
00427 if (status == 0)    /* Always true except after long jump. */
00428     {
00429     if (doTrans)
00430        {
00431        if (queryIsProt)
00432             transQuery(transGf, seq, connectionHandle, buf);
00433        else
00434             transTransQuery(transGf, seq, 
00435                 connectionHandle, buf);
00436        }
00437     else
00438         dnaQuery(gf, seq, connectionHandle, buf);
00439     popAbortHandler();
00440     }
00441 else    /* They long jumped here because of an error. */
00442     {
00443     errorSafeCleanupMess(connectionHandle, 
00444         "Error: gfServer out of memory. Try reducing size of query.");
00445     }
00446 memTrackerEnd();
00447 }
00448 
00449 static void errorSafePcr(struct genoFind *gf, char *fPrimer, char *rPrimer, 
00450         int maxDistance, int connectionHandle)
00451 /* Wrap error handling around pcr index query. */
00452 {
00453 int status;
00454 errorSafeSetup();
00455 status = setjmp(gfRecover);
00456 if (status == 0)    /* Always true except after long jump. */
00457     {
00458     pcrQuery(gf, fPrimer, rPrimer, maxDistance, connectionHandle);
00459     popAbortHandler();
00460     }
00461 else    /* They long jumped here because of an error. */
00462     {
00463     errorSafeCleanupMess(connectionHandle, 
00464         "Error: gfServer out of memory."); 
00465     }
00466 memTrackerEnd();
00467 }
00468 
00469 boolean badPcrPrimerSeq(char *s)
00470 /* Return TRUE if have a character we can't handle in sequence. */
00471 {
00472 unsigned char c;
00473 while ((c = *s++) != 0)
00474     {
00475     if (ntVal[c] < 0)
00476         return TRUE;
00477     }
00478 return FALSE;
00479 }
00480 
00481 void startServer(char *hostName, char *portName, int fileCount, 
00482         char *seqFiles[])
00483 /* Load up index and hang out in RAM. */
00484 {
00485 struct genoFind *gf = NULL;
00486 static struct genoFind *transGf[2][3];
00487 char buf[256];
00488 char *line, *command;
00489 socklen_t fromLen;
00490 int readSize;
00491 int socketHandle = 0, connectionHandle = 0;
00492 int port = atoi(portName);
00493 time_t curtime;
00494 struct tm *loctime;
00495 char timestr[256];
00496 
00497 netBlockBrokenPipes();
00498 
00499 curtime = time (NULL);           /* Get the current time. */
00500 loctime = localtime (&curtime);  /* Convert it to local time representation. */
00501 strftime (timestr, sizeof(timestr), "%Y-%m-%d %H:%M", loctime); /* formate datetime as string */
00502                                                                 
00503 logInfo("gfServer version %s on host %s, port %s  (%s)", gfVersion, 
00504         hostName, portName, timestr);
00505 if (doTrans)
00506     {
00507     uglyf("starting translated server...\n");
00508     logInfo("setting up translated index");
00509     gfIndexTransNibsAndTwoBits(transGf, fileCount, seqFiles, 
00510         minMatch, maxGap, tileSize, repMatch, NULL, allowOneMismatch, 
00511         doMask, stepSize);
00512     }
00513 else
00514     {
00515     uglyf("starting untranslated server...\n");
00516     logInfo("setting up untranslated index");
00517     gf = gfIndexNibsAndTwoBits(fileCount, seqFiles, minMatch, 
00518         maxGap, tileSize, repMatch, NULL, allowOneMismatch, stepSize);
00519     }
00520 logInfo("indexing complete");
00521 
00522 /* Set up socket.  Get ready to listen to it. */
00523 socketHandle = netAcceptingSocket(port, 100);
00524 
00525 logInfo("Server ready for queries!");
00526 printf("Server ready for queries!\n");
00527 for (;;)
00528     {
00529     connectionHandle = accept(socketHandle, NULL, &fromLen);
00530     if (connectionHandle < 0)
00531         {
00532         warn("Error accepting the connection");
00533         ++warnCount;
00534         continue;
00535         }
00536     readSize = read(connectionHandle, buf, sizeof(buf)-1);
00537     if (readSize < 0)
00538         {
00539         warn("Error reading from socket: %s", strerror(errno));
00540         ++warnCount;
00541         close(connectionHandle);
00542         continue;
00543         }
00544     if (readSize == 0)
00545         {
00546         warn("Zero sized query");
00547         ++warnCount;
00548         close(connectionHandle);
00549         continue;
00550         }
00551     buf[readSize] = 0;
00552     logDebug("%s", buf);
00553     if (!startsWith(gfSignature(), buf))
00554         {
00555         ++noSigCount;
00556         close(connectionHandle);
00557         continue;
00558         }
00559     line = buf + strlen(gfSignature());
00560     command = nextWord(&line);
00561     if (sameString("quit", command))
00562         {
00563         if (canStop)
00564             break;
00565         else
00566             logError("Ignoring quit message");
00567         }
00568     else if (sameString("status", command))
00569         {
00570         sprintf(buf, "version %s", gfVersion);
00571         netSendString(connectionHandle, buf);
00572         sprintf(buf, "type %s", (doTrans ? "translated" : "nucleotide"));
00573         netSendString(connectionHandle, buf);
00574         sprintf(buf, "host %s", hostName);
00575         netSendString(connectionHandle, buf);
00576         sprintf(buf, "port %s", portName);
00577         netSendString(connectionHandle, buf);
00578         sprintf(buf, "tileSize %d", tileSize);
00579         netSendString(connectionHandle, buf);
00580         sprintf(buf, "stepSize %d", stepSize);
00581         netSendString(connectionHandle, buf);
00582         sprintf(buf, "minMatch %d", minMatch);
00583         netSendString(connectionHandle, buf);
00584         sprintf(buf, "pcr requests %ld", pcrCount);
00585         netSendString(connectionHandle, buf);
00586         sprintf(buf, "blat requests %ld", blatCount);
00587         netSendString(connectionHandle, buf);
00588         sprintf(buf, "bases %ld", baseCount);
00589         netSendString(connectionHandle, buf);
00590         if (doTrans)
00591             {
00592             sprintf(buf, "aa %ld", aaCount);
00593             netSendString(connectionHandle, buf);
00594             }
00595         sprintf(buf, "misses %d", missCount);
00596         netSendString(connectionHandle, buf);
00597         sprintf(buf, "noSig %d", noSigCount);
00598         netSendString(connectionHandle, buf);
00599         sprintf(buf, "trimmed %d", trimCount);
00600         netSendString(connectionHandle, buf);
00601         sprintf(buf, "warnings %d", warnCount);
00602         netSendString(connectionHandle, buf);
00603         netSendString(connectionHandle, "end");
00604         }
00605     else if (sameString("query", command) || 
00606         sameString("protQuery", command) || sameString("transQuery", command))
00607         {
00608         boolean queryIsProt = sameString(command, "protQuery");
00609         char *s = nextWord(&line);
00610         if (s == NULL || !isdigit(s[0]))
00611             {
00612             warn("Expecting query size after query command");
00613             ++warnCount;
00614             }
00615         else
00616             {
00617             struct dnaSeq seq;
00618             ZeroVar(&seq);
00619 
00620             if (queryIsProt && !doTrans)
00621                 {
00622                 warn("protein query sent to nucleotide server");
00623                 ++warnCount;
00624                 queryIsProt = FALSE;
00625                 }
00626             else
00627                 {
00628                 buf[0] = 'Y';
00629                 if (write(connectionHandle, buf, 1) == 1)
00630                     {
00631                     seq.size = atoi(s);
00632                     seq.name = NULL;
00633                     if (seq.size > 0)
00634                         {
00635                         ++blatCount;
00636                         seq.dna = needLargeMem(seq.size+1);
00637                         if (gfReadMulti(connectionHandle, seq.dna, seq.size) != seq.size)
00638                             {
00639                             warn("Didn't sockRecieveString all %d bytes of query sequence", seq.size);
00640                             ++warnCount;
00641                             }
00642                         else
00643                             {
00644                             int maxSize = (doTrans ? maxAaSize : maxNtSize);
00645 
00646                             seq.dna[seq.size] = 0;
00647                             if (queryIsProt)
00648                                 {
00649                                 seq.size = aaFilteredSize(seq.dna);
00650                                 aaFilter(seq.dna, seq.dna);
00651                                 }
00652                             else
00653                                 {
00654                                 seq.size = dnaFilteredSize(seq.dna);
00655                                 dnaFilter(seq.dna, seq.dna);
00656                                 }
00657                             if (seq.size > maxSize)
00658                                 {
00659                                 ++trimCount;
00660                                 seq.size = maxSize;
00661                                 seq.dna[maxSize] = 0;
00662                                 }
00663                             if (queryIsProt)
00664                                 aaCount += seq.size;
00665                             else
00666                                 baseCount += seq.size;
00667                             if (seqLog && (logGetFile() != NULL))
00668                                 {
00669                                 FILE *lf = logGetFile();
00670                                 faWriteNext(lf, "query", seq.dna, seq.size);
00671                                 fflush(lf);
00672                                 }
00673                             errorSafeQuery(doTrans, queryIsProt, &seq, gf, 
00674                                     transGf, connectionHandle, buf);
00675                             }
00676                         freez(&seq.dna);
00677                         }
00678                     netSendString(connectionHandle, "end");
00679                     }
00680                 }
00681             }
00682         }
00683     else if (sameString("pcr", command))
00684         {
00685         char *f = nextWord(&line);
00686         char *r = nextWord(&line);
00687         char *s = nextWord(&line);
00688         int maxDistance;
00689         ++pcrCount;
00690         if (s == NULL || !isdigit(s[0]))
00691             {
00692             warn("Badly formatted pcr command");
00693             ++warnCount;
00694             }
00695         else if (doTrans)
00696             {
00697             warn("Can't pcr on translated server");
00698             ++warnCount;
00699             }
00700         else if (badPcrPrimerSeq(f) || badPcrPrimerSeq(r))
00701             {
00702             warn("Can only handle ACGT in primer sequences.");
00703             ++warnCount;
00704             }
00705         else
00706             {
00707             maxDistance = atoi(s);
00708             errorSafePcr(gf, f, r, maxDistance, connectionHandle);
00709             }
00710         }
00711     else if (sameString("files", command))
00712         {
00713         int i;
00714         sprintf(buf, "%d", fileCount);
00715         netSendString(connectionHandle, buf);
00716         for (i=0; i<fileCount; ++i)
00717             {
00718             sprintf(buf, "%s", seqFiles[i]);
00719             netSendString(connectionHandle, buf);
00720             }
00721         }
00722     else
00723         {
00724         warn("Unknown command %s", command);
00725         ++warnCount;
00726         }
00727     close(connectionHandle);
00728     connectionHandle = 0;
00729     }
00730 close(socketHandle);
00731 }
00732 
00733 void stopServer(char *hostName, char *portName)
00734 /* Send stop message to server. */
00735 {
00736 char buf[256];
00737 int sd = 0;
00738 
00739 sd = netMustConnectTo(hostName, portName);
00740 sprintf(buf, "%squit", gfSignature());
00741 write(sd, buf, strlen(buf));
00742 close(sd);
00743 printf("sent stop message to server\n");
00744 }
00745 
00746 int statusServer(char *hostName, char *portName)
00747 /* Send status message to server arnd report result. */
00748 {
00749 char buf[256];
00750 int sd = 0;
00751 int ret = 0;
00752 
00753 /* Put together command. */
00754 sd = netMustConnectTo(hostName, portName);
00755 sprintf(buf, "%sstatus", gfSignature());
00756 write(sd, buf, strlen(buf));
00757 
00758 for (;;)
00759     {
00760     if (netGetString(sd, buf) == NULL)
00761         {
00762         warn("Error reading status information from %s:%s",hostName,portName);
00763         ret = -1;
00764         break;
00765         }
00766     if (sameString(buf, "end"))
00767         break;
00768     else
00769         printf("%s\n", buf);
00770     }
00771 close(sd);
00772 return(ret); 
00773 }
00774 
00775 void queryServer(char *type, 
00776         char *hostName, char *portName, char *faName, boolean complex, boolean isProt)
00777 /* Send simple query to server and report results. */
00778 {
00779 char buf[256];
00780 int sd = 0;
00781 bioSeq *seq = faReadSeq(faName, !isProt);
00782 int matchCount = 0;
00783 
00784 /* Put together query command. */
00785 sd = netMustConnectTo(hostName, portName);
00786 sprintf(buf, "%s%s %d", gfSignature(), type, seq->size);
00787 write(sd, buf, strlen(buf));
00788 
00789 read(sd, buf, 1);
00790 if (buf[0] != 'Y')
00791     errAbort("Expecting 'Y' from server, got %c", buf[0]);
00792 write(sd, seq->dna, seq->size);
00793 
00794 if (complex)
00795     {
00796     char *s = netRecieveString(sd, buf);
00797     printf("%s\n", s);
00798     }
00799 
00800 for (;;)
00801     {
00802     if (netGetString(sd, buf) == NULL)
00803         break;
00804     if (sameString(buf, "end"))
00805         {
00806         printf("%d matches\n", matchCount);
00807         break;
00808         }
00809     else if (startsWith("Error:", buf))
00810        {
00811        errAbort(buf);
00812        break;
00813        }
00814     else
00815         {
00816         printf("%s\n", buf);
00817         if (complex)
00818             {
00819             char *s = netGetLongString(sd);
00820             if (s == NULL)
00821                 break;
00822             printf("%s\n", s);
00823             freeMem(s);
00824             }
00825         }
00826     ++matchCount;
00827     }
00828 close(sd);
00829 }
00830 
00831 void pcrServer(char *hostName, char *portName, char *fPrimer, char *rPrimer, int maxSize)
00832 /* Do a PCR query to server daemon. */
00833 {
00834 char buf[256];
00835 int sd = 0;
00836 
00837 /* Put together query command and send. */
00838 sd = netMustConnectTo(hostName, portName);
00839 sprintf(buf, "%spcr %s %s %d", gfSignature(), fPrimer, rPrimer, maxSize);
00840 write(sd, buf, strlen(buf));
00841 
00842 /* Fetch and display results. */
00843 for (;;)
00844     {
00845     if (netGetString(sd, buf) == NULL)
00846         break;
00847     if (sameString(buf, "end"))
00848         break;
00849     else if (startsWith("Error:", buf))
00850         {
00851         errAbort(buf);
00852         break;
00853         }
00854     else
00855         {
00856         printf("%s\n", buf);
00857         }
00858     }
00859 close(sd);
00860 }
00861 
00862 
00863 void getFileList(char *hostName, char *portName)
00864 /* Get and display input file list. */
00865 {
00866 char buf[256];
00867 int sd = 0;
00868 int fileCount;
00869 int i;
00870 
00871 /* Put together command. */
00872 sd = netMustConnectTo(hostName, portName);
00873 sprintf(buf, "%sfiles", gfSignature());
00874 write(sd, buf, strlen(buf));
00875 
00876 /* Get count of files, and then each file name. */
00877 if (netGetString(sd, buf) != NULL)
00878     {
00879     fileCount = atoi(buf);
00880     for (i=0; i<fileCount; ++i)
00881         {
00882         printf("%s\n", netRecieveString(sd, buf));
00883         }
00884     }
00885 close(sd);
00886 }
00887 
00888 int main(int argc, char *argv[])
00889 /* Process command line. */
00890 {
00891 char *command;
00892 
00893 gfCatchPipes();
00894 dnaUtilOpen();
00895 optionInit(&argc, argv, optionSpecs);
00896 command = argv[1];
00897 if (optionExists("trans"))
00898     {
00899     doTrans = TRUE;
00900     tileSize = 4;
00901     minMatch = 3;
00902     maxGap = 0;
00903     repMatch = gfPepMaxTileUse;
00904     }
00905 tileSize = optionInt("tileSize", tileSize);
00906 stepSize = optionInt("stepSize", stepSize);
00907 if (stepSize == 0)
00908     stepSize = tileSize;
00909 repMatch = round( (double)tileSize/(double)stepSize * repMatch);
00910 minMatch = optionInt("minMatch", minMatch);
00911 repMatch = optionInt("repMatch", repMatch);
00912 maxDnaHits = optionInt("maxDnaHits", maxDnaHits);
00913 maxTransHits = optionInt("maxTransHits", maxTransHits);
00914 maxNtSize = optionInt("maxNtSize", maxNtSize);
00915 maxAaSize = optionInt("maxAaSize", maxAaSize);
00916 seqLog = optionExists("seqLog");
00917 doMask = optionExists("mask");
00918 canStop = optionExists("canStop");
00919 if (argc < 2)
00920     usage();
00921 if (optionExists("log"))
00922     logOpenFile(argv[0], optionVal("log", NULL));
00923 if (optionExists("syslog"))
00924     logOpenSyslog(argv[0], optionVal("logFacility", NULL));
00925 
00926 if (sameWord(command, "direct"))
00927     {
00928     if (argc < 4)
00929         usage();
00930     genoFindDirect(argv[2], argc-3, argv+3);
00931     }
00932 else if (sameWord(command, "pcrDirect"))
00933     {
00934     if (argc < 5)
00935         usage();
00936     genoPcrDirect(argv[2], argv[3], argc-4, argv+4);
00937     }
00938 else if (sameWord(command, "start"))
00939     {
00940     if (argc < 5)
00941         usage();
00942     startServer(argv[2], argv[3], argc-4, argv+4);
00943     }
00944 else if (sameWord(command, "stop"))
00945     {
00946     if (argc != 4)
00947         usage();
00948     stopServer(argv[2], argv[3]);
00949     }
00950 else if (sameWord(command, "query"))
00951     {
00952     if (argc != 5)
00953         usage();
00954     queryServer(command, argv[2], argv[3], argv[4], FALSE, FALSE);
00955     }
00956 else if (sameWord(command, "protQuery"))
00957     {
00958     if (argc != 5)
00959         usage();
00960     queryServer(command, argv[2], argv[3], argv[4], TRUE, TRUE);
00961     }
00962 else if (sameWord(command, "transQuery"))
00963     {
00964     if (argc != 5)
00965         usage();
00966     queryServer(command, argv[2], argv[3], argv[4], TRUE, FALSE);
00967     }
00968 else if (sameWord(command, "pcr"))
00969     {
00970     if (argc != 7)
00971         usage();
00972     pcrServer(argv[2], argv[3], argv[4], argv[5], atoi(argv[6]));
00973     }
00974 else if (sameWord(command, "status"))
00975     {
00976     if (argc != 4)
00977         usage();
00978     if (statusServer(argv[2], argv[3]))
00979         {
00980         exit(-1);
00981         }
00982     }
00983 else if (sameWord(command, "files"))
00984     {
00985     if (argc != 4)
00986         usage();
00987     getFileList(argv[2], argv[3]);
00988     }
00989 else
00990     {
00991     usage();
00992     }
00993 return 0;
00994 }

Generated on Tue Dec 25 18:39:29 2007 for blat by  doxygen 1.5.2