00001
00002
00003
00004 #include "common.h"
00005 #include <signal.h>
00006 #include <sys/socket.h>
00007 #include <netinet/in.h>
00008 #include <netdb.h>
00009 #include "portable.h"
00010 #include "net.h"
00011 #include "dnautil.h"
00012 #include "dnaseq.h"
00013 #include "nib.h"
00014 #include "twoBit.h"
00015 #include "fa.h"
00016 #include "dystring.h"
00017 #include "errabort.h"
00018 #include "memalloc.h"
00019 #include "genoFind.h"
00020 #include "options.h"
00021 #include "trans3.h"
00022 #include "log.h"
00023
00024 static char const rcsid[] = "$Id: gfServer.c,v 1.54 2007/03/31 19:38:13 markd Exp $";
00025
00026 static struct optionSpec optionSpecs[] = {
00027 {"canStop", OPTION_BOOLEAN},
00028 {"log", OPTION_STRING},
00029 {"logFacility", OPTION_STRING},
00030 {"mask", OPTION_BOOLEAN},
00031 {"maxAaSize", OPTION_INT},
00032 {"maxDnaHits", OPTION_INT},
00033 {"maxGap", OPTION_INT},
00034 {"maxNtSize", OPTION_INT},
00035 {"maxTransHits", OPTION_INT},
00036 {"minMatch", OPTION_INT},
00037 {"repMatch", OPTION_INT},
00038 {"seqLog", OPTION_BOOLEAN},
00039 {"stepSize", OPTION_INT},
00040 {"tileSize", OPTION_INT},
00041 {"trans", OPTION_BOOLEAN},
00042 {"syslog", OPTION_BOOLEAN},
00043 {NULL, 0}
00044 };
00045
00046
00047 int maxNtSize = 40000;
00048 int maxAaSize = 8000;
00049
00050 int minMatch = gfMinMatch;
00051 int tileSize = gfTileSize;
00052 int stepSize = 0;
00053 boolean doTrans = FALSE;
00054 boolean allowOneMismatch = FALSE;
00055 int repMatch = 1024;
00056 int maxDnaHits = 100;
00057 int maxTransHits = 200;
00058 int maxGap = gfMaxGap;
00059 boolean seqLog = FALSE;
00060 boolean doMask = FALSE;
00061 boolean canStop = FALSE;
00062
00063 void usage()
00064
00065 {
00066 errAbort(
00067 "gfServer v %s - Make a server to quickly find where DNA occurs in genome.\n"
00068 "To set up a server:\n"
00069 " gfServer start host port file(s)\n"
00070 " Where the files are in .nib or .2bit format\n"
00071 "To remove a server:\n"
00072 " gfServer stop host port\n"
00073 "To query a server with DNA sequence:\n"
00074 " gfServer query host port probe.fa\n"
00075 "To query a server with protein sequence:\n"
00076 " gfServer protQuery host port probe.fa\n"
00077 "To query a server with translated dna sequence:\n"
00078 " gfServer transQuery host port probe.fa\n"
00079 "To query server with PCR primers\n"
00080 " gfServer pcr host port fPrimer rPrimer maxDistance\n"
00081 "To process one probe fa file against a .nib format genome (not starting server):\n"
00082 " gfServer direct probe.fa file(s).nib\n"
00083 "To test pcr without starting server:\n"
00084 " gfServer pcrDirect fPrimer rPrimer file(s).nib\n"
00085 "To figure out usage level\n"
00086 " gfServer status host port\n"
00087 "To get input file list\n"
00088 " gfServer files host port\n"
00089 "Options:\n"
00090 " -tileSize=N size of n-mers to index. Default is 11 for nucleotides, 4 for\n"
00091 " proteins (or translated nucleotides).\n"
00092 " -stepSize=N spacing between tiles. Default is tileSize.\n"
00093 " -minMatch=N Number of n-mer matches that trigger detailed alignment\n"
00094 " Default is 2 for nucleotides, 3 for protiens.\n"
00095 " -maxGap=N Number of insertions or deletions allowed between n-mers.\n"
00096 " Default is 2 for nucleotides, 0 for protiens.\n"
00097 " -trans Translate database to protein in 6 frames. Note: it is best\n"
00098 " to run this on RepeatMasked data in this case.\n"
00099 " -log=logFile keep a log file that records server requests.\n"
00100 " -seqLog Include sequences in log file (not logged with -syslog)\n"
00101 " -syslog Log to syslog\n"
00102 " -logFacility=facility log to the specified syslog facility - default local0.\n"
00103 " -mask Use masking from nib file.\n"
00104 " -repMatch=N Number of occurrences of a tile (nmer) that trigger repeat masking the tile.\n"
00105 " Default is %d.\n"
00106 " -maxDnaHits=N Maximum number of hits for a dna query that are sent from the server.\n"
00107 " Default is %d.\n"
00108 " -maxTransHits=N Maximum number of hits for a translated query that are sent from the server.\n"
00109 " Default is %d.\n"
00110 " -maxNtSize=N Maximum size of untranslated DNA query sequence\n"
00111 " Default is %d\n"
00112 " -maxAsSize=N Maximum size of protein or translated DNA queries\n"
00113 " Default is %d\n"
00114 " -canStop If set then a quit message will actually take down the\n"
00115 " server\n"
00116 , gfVersion, repMatch, maxDnaHits, maxTransHits, maxNtSize, maxAaSize
00117 );
00118
00119 }
00120
00121 void genoFindDirect(char *probeName, int fileCount, char *seqFiles[])
00122
00123 {
00124 struct genoFind *gf = NULL;
00125 struct lineFile *lf = lineFileOpen(probeName, TRUE);
00126 struct dnaSeq seq;
00127 int hitCount = 0, clumpCount = 0, oneHit;
00128 ZeroVar(&seq);
00129
00130 if (doTrans)
00131 errAbort("Don't support translated direct stuff currently, sorry");
00132
00133 gf = gfIndexNibsAndTwoBits(fileCount, seqFiles, minMatch, maxGap,
00134 tileSize, repMatch, FALSE,
00135 allowOneMismatch, stepSize);
00136
00137 while (faSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
00138 {
00139 struct lm *lm = lmInit(0);
00140 struct gfClump *clumpList = gfFindClumps(gf, &seq, lm, &oneHit), *clump;
00141 hitCount += oneHit;
00142 for (clump = clumpList; clump != NULL; clump = clump->next)
00143 {
00144 ++clumpCount;
00145 printf("%s ", seq.name);
00146 gfClumpDump(gf, clump, stdout);
00147 }
00148 gfClumpFreeList(&clumpList);
00149 lmCleanup(&lm);
00150 }
00151 lineFileClose(&lf);
00152 genoFindFree(&gf);
00153 }
00154
00155 void genoPcrDirect(char *fPrimer, char *rPrimer, int fileCount, char *seqFiles[])
00156
00157 {
00158 struct genoFind *gf = NULL;
00159 int fPrimerSize = strlen(fPrimer);
00160 int rPrimerSize = strlen(rPrimer);
00161 struct gfClump *clumpList, *clump;
00162 time_t startTime, endTime;
00163
00164 startTime = clock1000();
00165 gf = gfIndexNibsAndTwoBits(fileCount, seqFiles, minMatch, maxGap,
00166 tileSize, repMatch, FALSE,
00167 allowOneMismatch, stepSize);
00168 endTime = clock1000();
00169 printf("Index built in %4.3f seconds\n", 0.001 * (endTime - startTime));
00170
00171 printf("plus strand:\n");
00172 startTime = clock1000();
00173 clumpList = gfPcrClumps(gf, fPrimer, fPrimerSize, rPrimer, rPrimerSize, 0, 4*1024);
00174 endTime = clock1000();
00175 printf("Index searched in %4.3f seconds\n", 0.001 * (endTime - startTime));
00176 for (clump = clumpList; clump != NULL; clump = clump->next)
00177 {
00178
00179
00180
00181 clump->tStart += clump->target->start;
00182 clump->tEnd += clump->target->start;
00183 gfClumpDump(gf, clump, stdout);
00184 }
00185 printf("minus strand:\n");
00186 startTime = clock1000();
00187 clumpList = gfPcrClumps(gf, rPrimer, rPrimerSize, fPrimer, fPrimerSize, 0, 4*1024);
00188 endTime = clock1000();
00189 printf("Index searched in %4.3f seconds\n", 0.001 * (endTime - startTime));
00190 for (clump = clumpList; clump != NULL; clump = clump->next)
00191 {
00192
00193 clump->tStart += clump->target->start;
00194 clump->tEnd += clump->target->start;
00195 gfClumpDump(gf, clump, stdout);
00196 }
00197
00198 genoFindFree(&gf);
00199 }
00200
00201 int getPortIx(char *portName)
00202
00203 {
00204 if (!isdigit(portName[0]))
00205 errAbort("Expecting a port number got %s", portName);
00206 return atoi(portName);
00207 }
00208
00209 struct sockaddr_in sai;
00210
00211
00212 long baseCount = 0, blatCount = 0, aaCount = 0, pcrCount = 0;
00213 int warnCount = 0;
00214 int noSigCount = 0;
00215 int missCount = 0;
00216 int trimCount = 0;
00217
00218 void dnaQuery(struct genoFind *gf, struct dnaSeq *seq,
00219 int connectionHandle, char buf[256])
00220
00221 {
00222 struct gfClump *clumpList = NULL, *clump;
00223 int limit = 1000;
00224 int clumpCount = 0, hitCount = -1;
00225 struct lm *lm = lmInit(0);
00226
00227 if (seq->size > gf->tileSize + gf->stepSize + gf->stepSize)
00228 limit = maxDnaHits;
00229 clumpList = gfFindClumps(gf, seq, lm, &hitCount);
00230 if (clumpList == NULL)
00231 ++missCount;
00232 for (clump = clumpList; clump != NULL; clump = clump->next)
00233 {
00234 struct gfSeqSource *ss = clump->target;
00235 sprintf(buf, "%d\t%d\t%s\t%d\t%d\t%d",
00236 clump->qStart, clump->qEnd, ss->fileName,
00237 clump->tStart-ss->start, clump->tEnd-ss->start, clump->hitCount);
00238 netSendString(connectionHandle, buf);
00239 ++clumpCount;
00240 if (--limit < 0)
00241 break;
00242 }
00243 gfClumpFreeList(&clumpList);
00244 lmCleanup(&lm);
00245 logDebug("%lu %d clumps, %d hits", clock1000(), clumpCount, hitCount);
00246 }
00247
00248 void transQuery(struct genoFind *transGf[2][3], aaSeq *seq,
00249 int connectionHandle, char buf[256])
00250
00251 {
00252 struct gfClump *clumps[3], *clump;
00253 int isRc, frame;
00254 char strand;
00255 struct dyString *dy = newDyString(1024);
00256 struct gfHit *hit;
00257 int clumpCount = 0, hitCount = 0, oneHit;
00258 struct lm *lm = lmInit(0);
00259
00260 sprintf(buf, "tileSize %d", tileSize);
00261 netSendString(connectionHandle, buf);
00262 for (frame = 0; frame < 3; ++frame)
00263 clumps[frame] = NULL;
00264 for (isRc = 0; isRc <= 1; ++isRc)
00265 {
00266 strand = (isRc ? '-' : '+');
00267 gfTransFindClumps(transGf[isRc], seq, clumps, lm, &oneHit);
00268 hitCount += oneHit;
00269 for (frame = 0; frame < 3; ++frame)
00270 {
00271 int limit = maxTransHits;
00272 for (clump = clumps[frame]; clump != NULL; clump = clump->next)
00273 {
00274 struct gfSeqSource *ss = clump->target;
00275 sprintf(buf, "%d\t%d\t%s\t%d\t%d\t%d\t%c\t%d",
00276 clump->qStart, clump->qEnd, ss->fileName,
00277 clump->tStart-ss->start, clump->tEnd-ss->start, clump->hitCount,
00278 strand, frame);
00279 netSendString(connectionHandle, buf);
00280 dyStringClear(dy);
00281 for (hit = clump->hitList; hit != NULL; hit = hit->next)
00282 dyStringPrintf(dy, " %d %d", hit->qStart, hit->tStart - ss->start);
00283 netSendLongString(connectionHandle, dy->string);
00284 ++clumpCount;
00285 if (--limit < 0)
00286 break;
00287 }
00288 gfClumpFreeList(&clumps[frame]);
00289 }
00290 }
00291 if (clumpCount == 0)
00292 ++missCount;
00293 freeDyString(&dy);
00294 lmCleanup(&lm);
00295 logDebug("%lu %d clumps, %d hits", clock1000(), clumpCount, hitCount);
00296 }
00297
00298 void transTransQuery(struct genoFind *transGf[2][3], struct dnaSeq *seq,
00299 int connectionHandle, char buf[256])
00300
00301 {
00302 struct gfClump *clumps[3][3], *clump;
00303 int isRc, qFrame, tFrame;
00304 char strand;
00305 struct trans3 *t3 = trans3New(seq);
00306 struct dyString *dy = newDyString(1024);
00307 struct gfHit *hit;
00308 int clumpCount = 0, hitCount = 0, oneCount;
00309
00310 sprintf(buf, "tileSize %d", tileSize);
00311 netSendString(connectionHandle, buf);
00312 for (qFrame = 0; qFrame<3; ++qFrame)
00313 for (tFrame=0; tFrame<3; ++tFrame)
00314 clumps[qFrame][tFrame] = NULL;
00315 for (isRc = 0; isRc <= 1; ++isRc)
00316 {
00317 struct lm *lm = lmInit(0);
00318 strand = (isRc ? '-' : '+');
00319 gfTransTransFindClumps(transGf[isRc], t3->trans, clumps, lm, &oneCount);
00320 hitCount += oneCount;
00321 for (qFrame = 0; qFrame<3; ++qFrame)
00322 {
00323 for (tFrame=0; tFrame<3; ++tFrame)
00324 {
00325 int limit = maxTransHits;
00326 for (clump = clumps[qFrame][tFrame]; clump != NULL; clump = clump->next)
00327 {
00328 struct gfSeqSource *ss = clump->target;
00329 sprintf(buf, "%d\t%d\t%s\t%d\t%d\t%d\t%c\t%d\t%d",
00330 clump->qStart, clump->qEnd, ss->fileName,
00331 clump->tStart-ss->start, clump->tEnd-ss->start, clump->hitCount,
00332 strand, qFrame, tFrame);
00333 netSendString(connectionHandle, buf);
00334 dyStringClear(dy);
00335 for (hit = clump->hitList; hit != NULL; hit = hit->next)
00336 {
00337 dyStringPrintf(dy, " %d %d", hit->qStart, hit->tStart - ss->start);
00338 }
00339 netSendLongString(connectionHandle, dy->string);
00340 ++clumpCount;
00341 if (--limit < 0)
00342 break;
00343 }
00344 gfClumpFreeList(&clumps[qFrame][tFrame]);
00345 }
00346 }
00347 lmCleanup(&lm);
00348 }
00349 trans3Free(&t3);
00350 if (clumpCount == 0)
00351 ++missCount;
00352 logDebug("%lu %d clumps, %d hits", clock1000(), clumpCount, hitCount);
00353 }
00354
00355 static void pcrQuery(struct genoFind *gf, char *fPrimer, char *rPrimer,
00356 int maxDistance, int connectionHandle)
00357
00358 {
00359 int fPrimerSize = strlen(fPrimer);
00360 int rPrimerSize = strlen(rPrimer);
00361 struct gfClump *clumpList, *clump;
00362 int clumpCount = 0;
00363 char buf[256];
00364
00365 clumpList = gfPcrClumps(gf, fPrimer, fPrimerSize, rPrimer, rPrimerSize, 0, maxDistance);
00366 for (clump = clumpList; clump != NULL; clump = clump->next)
00367 {
00368 struct gfSeqSource *ss = clump->target;
00369 safef(buf, sizeof(buf), "%s\t%d\t%d\t+", ss->fileName,
00370 clump->tStart, clump->tEnd);
00371 netSendString(connectionHandle, buf);
00372 ++clumpCount;
00373 }
00374 gfClumpFreeList(&clumpList);
00375
00376 clumpList = gfPcrClumps(gf, rPrimer, rPrimerSize, fPrimer, fPrimerSize, 0, maxDistance);
00377
00378 for (clump = clumpList; clump != NULL; clump = clump->next)
00379 {
00380 struct gfSeqSource *ss = clump->target;
00381 safef(buf, sizeof(buf), "%s\t%d\t%d\t-", ss->fileName,
00382 clump->tStart, clump->tEnd);
00383 netSendString(connectionHandle, buf);
00384 ++clumpCount;
00385 }
00386 gfClumpFreeList(&clumpList);
00387 netSendString(connectionHandle, "end");
00388 logDebug("%lu PCR %s %s %d clumps\n", clock1000(), fPrimer, rPrimer, clumpCount);
00389 }
00390
00391
00392 static jmp_buf gfRecover;
00393 static char *ripCord = NULL;
00394
00395
00396 static void gfAbort()
00397
00398 {
00399 freez(&ripCord);
00400 longjmp(gfRecover, -1);
00401 }
00402
00403 static void errorSafeSetup()
00404
00405 {
00406 memTrackerStart();
00407 pushAbortHandler(gfAbort);
00408 ripCord = needMem(64*1024);
00409 }
00410
00411 static void errorSafeCleanupMess(int connectionHandle, char *message)
00412
00413 {
00414 popAbortHandler();
00415 logError("Recovering from error via longjmp");
00416 netSendString(connectionHandle, message);
00417 }
00418
00419 static void errorSafeQuery(boolean doTrans, boolean queryIsProt,
00420 struct dnaSeq *seq, struct genoFind *gf, struct genoFind *transGf[2][3],
00421 int connectionHandle, char *buf)
00422
00423 {
00424 int status;
00425 errorSafeSetup();
00426 status = setjmp(gfRecover);
00427 if (status == 0)
00428 {
00429 if (doTrans)
00430 {
00431 if (queryIsProt)
00432 transQuery(transGf, seq, connectionHandle, buf);
00433 else
00434 transTransQuery(transGf, seq,
00435 connectionHandle, buf);
00436 }
00437 else
00438 dnaQuery(gf, seq, connectionHandle, buf);
00439 popAbortHandler();
00440 }
00441 else
00442 {
00443 errorSafeCleanupMess(connectionHandle,
00444 "Error: gfServer out of memory. Try reducing size of query.");
00445 }
00446 memTrackerEnd();
00447 }
00448
00449 static void errorSafePcr(struct genoFind *gf, char *fPrimer, char *rPrimer,
00450 int maxDistance, int connectionHandle)
00451
00452 {
00453 int status;
00454 errorSafeSetup();
00455 status = setjmp(gfRecover);
00456 if (status == 0)
00457 {
00458 pcrQuery(gf, fPrimer, rPrimer, maxDistance, connectionHandle);
00459 popAbortHandler();
00460 }
00461 else
00462 {
00463 errorSafeCleanupMess(connectionHandle,
00464 "Error: gfServer out of memory.");
00465 }
00466 memTrackerEnd();
00467 }
00468
00469 boolean badPcrPrimerSeq(char *s)
00470
00471 {
00472 unsigned char c;
00473 while ((c = *s++) != 0)
00474 {
00475 if (ntVal[c] < 0)
00476 return TRUE;
00477 }
00478 return FALSE;
00479 }
00480
00481 void startServer(char *hostName, char *portName, int fileCount,
00482 char *seqFiles[])
00483
00484 {
00485 struct genoFind *gf = NULL;
00486 static struct genoFind *transGf[2][3];
00487 char buf[256];
00488 char *line, *command;
00489 socklen_t fromLen;
00490 int readSize;
00491 int socketHandle = 0, connectionHandle = 0;
00492 int port = atoi(portName);
00493 time_t curtime;
00494 struct tm *loctime;
00495 char timestr[256];
00496
00497 netBlockBrokenPipes();
00498
00499 curtime = time (NULL);
00500 loctime = localtime (&curtime);
00501 strftime (timestr, sizeof(timestr), "%Y-%m-%d %H:%M", loctime);
00502
00503 logInfo("gfServer version %s on host %s, port %s (%s)", gfVersion,
00504 hostName, portName, timestr);
00505 if (doTrans)
00506 {
00507 uglyf("starting translated server...\n");
00508 logInfo("setting up translated index");
00509 gfIndexTransNibsAndTwoBits(transGf, fileCount, seqFiles,
00510 minMatch, maxGap, tileSize, repMatch, NULL, allowOneMismatch,
00511 doMask, stepSize);
00512 }
00513 else
00514 {
00515 uglyf("starting untranslated server...\n");
00516 logInfo("setting up untranslated index");
00517 gf = gfIndexNibsAndTwoBits(fileCount, seqFiles, minMatch,
00518 maxGap, tileSize, repMatch, NULL, allowOneMismatch, stepSize);
00519 }
00520 logInfo("indexing complete");
00521
00522
00523 socketHandle = netAcceptingSocket(port, 100);
00524
00525 logInfo("Server ready for queries!");
00526 printf("Server ready for queries!\n");
00527 for (;;)
00528 {
00529 connectionHandle = accept(socketHandle, NULL, &fromLen);
00530 if (connectionHandle < 0)
00531 {
00532 warn("Error accepting the connection");
00533 ++warnCount;
00534 continue;
00535 }
00536 readSize = read(connectionHandle, buf, sizeof(buf)-1);
00537 if (readSize < 0)
00538 {
00539 warn("Error reading from socket: %s", strerror(errno));
00540 ++warnCount;
00541 close(connectionHandle);
00542 continue;
00543 }
00544 if (readSize == 0)
00545 {
00546 warn("Zero sized query");
00547 ++warnCount;
00548 close(connectionHandle);
00549 continue;
00550 }
00551 buf[readSize] = 0;
00552 logDebug("%s", buf);
00553 if (!startsWith(gfSignature(), buf))
00554 {
00555 ++noSigCount;
00556 close(connectionHandle);
00557 continue;
00558 }
00559 line = buf + strlen(gfSignature());
00560 command = nextWord(&line);
00561 if (sameString("quit", command))
00562 {
00563 if (canStop)
00564 break;
00565 else
00566 logError("Ignoring quit message");
00567 }
00568 else if (sameString("status", command))
00569 {
00570 sprintf(buf, "version %s", gfVersion);
00571 netSendString(connectionHandle, buf);
00572 sprintf(buf, "type %s", (doTrans ? "translated" : "nucleotide"));
00573 netSendString(connectionHandle, buf);
00574 sprintf(buf, "host %s", hostName);
00575 netSendString(connectionHandle, buf);
00576 sprintf(buf, "port %s", portName);
00577 netSendString(connectionHandle, buf);
00578 sprintf(buf, "tileSize %d", tileSize);
00579 netSendString(connectionHandle, buf);
00580 sprintf(buf, "stepSize %d", stepSize);
00581 netSendString(connectionHandle, buf);
00582 sprintf(buf, "minMatch %d", minMatch);
00583 netSendString(connectionHandle, buf);
00584 sprintf(buf, "pcr requests %ld", pcrCount);
00585 netSendString(connectionHandle, buf);
00586 sprintf(buf, "blat requests %ld", blatCount);
00587 netSendString(connectionHandle, buf);
00588 sprintf(buf, "bases %ld", baseCount);
00589 netSendString(connectionHandle, buf);
00590 if (doTrans)
00591 {
00592 sprintf(buf, "aa %ld", aaCount);
00593 netSendString(connectionHandle, buf);
00594 }
00595 sprintf(buf, "misses %d", missCount);
00596 netSendString(connectionHandle, buf);
00597 sprintf(buf, "noSig %d", noSigCount);
00598 netSendString(connectionHandle, buf);
00599 sprintf(buf, "trimmed %d", trimCount);
00600 netSendString(connectionHandle, buf);
00601 sprintf(buf, "warnings %d", warnCount);
00602 netSendString(connectionHandle, buf);
00603 netSendString(connectionHandle, "end");
00604 }
00605 else if (sameString("query", command) ||
00606 sameString("protQuery", command) || sameString("transQuery", command))
00607 {
00608 boolean queryIsProt = sameString(command, "protQuery");
00609 char *s = nextWord(&line);
00610 if (s == NULL || !isdigit(s[0]))
00611 {
00612 warn("Expecting query size after query command");
00613 ++warnCount;
00614 }
00615 else
00616 {
00617 struct dnaSeq seq;
00618 ZeroVar(&seq);
00619
00620 if (queryIsProt && !doTrans)
00621 {
00622 warn("protein query sent to nucleotide server");
00623 ++warnCount;
00624 queryIsProt = FALSE;
00625 }
00626 else
00627 {
00628 buf[0] = 'Y';
00629 if (write(connectionHandle, buf, 1) == 1)
00630 {
00631 seq.size = atoi(s);
00632 seq.name = NULL;
00633 if (seq.size > 0)
00634 {
00635 ++blatCount;
00636 seq.dna = needLargeMem(seq.size+1);
00637 if (gfReadMulti(connectionHandle, seq.dna, seq.size) != seq.size)
00638 {
00639 warn("Didn't sockRecieveString all %d bytes of query sequence", seq.size);
00640 ++warnCount;
00641 }
00642 else
00643 {
00644 int maxSize = (doTrans ? maxAaSize : maxNtSize);
00645
00646 seq.dna[seq.size] = 0;
00647 if (queryIsProt)
00648 {
00649 seq.size = aaFilteredSize(seq.dna);
00650 aaFilter(seq.dna, seq.dna);
00651 }
00652 else
00653 {
00654 seq.size = dnaFilteredSize(seq.dna);
00655 dnaFilter(seq.dna, seq.dna);
00656 }
00657 if (seq.size > maxSize)
00658 {
00659 ++trimCount;
00660 seq.size = maxSize;
00661 seq.dna[maxSize] = 0;
00662 }
00663 if (queryIsProt)
00664 aaCount += seq.size;
00665 else
00666 baseCount += seq.size;
00667 if (seqLog && (logGetFile() != NULL))
00668 {
00669 FILE *lf = logGetFile();
00670 faWriteNext(lf, "query", seq.dna, seq.size);
00671 fflush(lf);
00672 }
00673 errorSafeQuery(doTrans, queryIsProt, &seq, gf,
00674 transGf, connectionHandle, buf);
00675 }
00676 freez(&seq.dna);
00677 }
00678 netSendString(connectionHandle, "end");
00679 }
00680 }
00681 }
00682 }
00683 else if (sameString("pcr", command))
00684 {
00685 char *f = nextWord(&line);
00686 char *r = nextWord(&line);
00687 char *s = nextWord(&line);
00688 int maxDistance;
00689 ++pcrCount;
00690 if (s == NULL || !isdigit(s[0]))
00691 {
00692 warn("Badly formatted pcr command");
00693 ++warnCount;
00694 }
00695 else if (doTrans)
00696 {
00697 warn("Can't pcr on translated server");
00698 ++warnCount;
00699 }
00700 else if (badPcrPrimerSeq(f) || badPcrPrimerSeq(r))
00701 {
00702 warn("Can only handle ACGT in primer sequences.");
00703 ++warnCount;
00704 }
00705 else
00706 {
00707 maxDistance = atoi(s);
00708 errorSafePcr(gf, f, r, maxDistance, connectionHandle);
00709 }
00710 }
00711 else if (sameString("files", command))
00712 {
00713 int i;
00714 sprintf(buf, "%d", fileCount);
00715 netSendString(connectionHandle, buf);
00716 for (i=0; i<fileCount; ++i)
00717 {
00718 sprintf(buf, "%s", seqFiles[i]);
00719 netSendString(connectionHandle, buf);
00720 }
00721 }
00722 else
00723 {
00724 warn("Unknown command %s", command);
00725 ++warnCount;
00726 }
00727 close(connectionHandle);
00728 connectionHandle = 0;
00729 }
00730 close(socketHandle);
00731 }
00732
00733 void stopServer(char *hostName, char *portName)
00734
00735 {
00736 char buf[256];
00737 int sd = 0;
00738
00739 sd = netMustConnectTo(hostName, portName);
00740 sprintf(buf, "%squit", gfSignature());
00741 write(sd, buf, strlen(buf));
00742 close(sd);
00743 printf("sent stop message to server\n");
00744 }
00745
00746 int statusServer(char *hostName, char *portName)
00747
00748 {
00749 char buf[256];
00750 int sd = 0;
00751 int ret = 0;
00752
00753
00754 sd = netMustConnectTo(hostName, portName);
00755 sprintf(buf, "%sstatus", gfSignature());
00756 write(sd, buf, strlen(buf));
00757
00758 for (;;)
00759 {
00760 if (netGetString(sd, buf) == NULL)
00761 {
00762 warn("Error reading status information from %s:%s",hostName,portName);
00763 ret = -1;
00764 break;
00765 }
00766 if (sameString(buf, "end"))
00767 break;
00768 else
00769 printf("%s\n", buf);
00770 }
00771 close(sd);
00772 return(ret);
00773 }
00774
00775 void queryServer(char *type,
00776 char *hostName, char *portName, char *faName, boolean complex, boolean isProt)
00777
00778 {
00779 char buf[256];
00780 int sd = 0;
00781 bioSeq *seq = faReadSeq(faName, !isProt);
00782 int matchCount = 0;
00783
00784
00785 sd = netMustConnectTo(hostName, portName);
00786 sprintf(buf, "%s%s %d", gfSignature(), type, seq->size);
00787 write(sd, buf, strlen(buf));
00788
00789 read(sd, buf, 1);
00790 if (buf[0] != 'Y')
00791 errAbort("Expecting 'Y' from server, got %c", buf[0]);
00792 write(sd, seq->dna, seq->size);
00793
00794 if (complex)
00795 {
00796 char *s = netRecieveString(sd, buf);
00797 printf("%s\n", s);
00798 }
00799
00800 for (;;)
00801 {
00802 if (netGetString(sd, buf) == NULL)
00803 break;
00804 if (sameString(buf, "end"))
00805 {
00806 printf("%d matches\n", matchCount);
00807 break;
00808 }
00809 else if (startsWith("Error:", buf))
00810 {
00811 errAbort(buf);
00812 break;
00813 }
00814 else
00815 {
00816 printf("%s\n", buf);
00817 if (complex)
00818 {
00819 char *s = netGetLongString(sd);
00820 if (s == NULL)
00821 break;
00822 printf("%s\n", s);
00823 freeMem(s);
00824 }
00825 }
00826 ++matchCount;
00827 }
00828 close(sd);
00829 }
00830
00831 void pcrServer(char *hostName, char *portName, char *fPrimer, char *rPrimer, int maxSize)
00832
00833 {
00834 char buf[256];
00835 int sd = 0;
00836
00837
00838 sd = netMustConnectTo(hostName, portName);
00839 sprintf(buf, "%spcr %s %s %d", gfSignature(), fPrimer, rPrimer, maxSize);
00840 write(sd, buf, strlen(buf));
00841
00842
00843 for (;;)
00844 {
00845 if (netGetString(sd, buf) == NULL)
00846 break;
00847 if (sameString(buf, "end"))
00848 break;
00849 else if (startsWith("Error:", buf))
00850 {
00851 errAbort(buf);
00852 break;
00853 }
00854 else
00855 {
00856 printf("%s\n", buf);
00857 }
00858 }
00859 close(sd);
00860 }
00861
00862
00863 void getFileList(char *hostName, char *portName)
00864
00865 {
00866 char buf[256];
00867 int sd = 0;
00868 int fileCount;
00869 int i;
00870
00871
00872 sd = netMustConnectTo(hostName, portName);
00873 sprintf(buf, "%sfiles", gfSignature());
00874 write(sd, buf, strlen(buf));
00875
00876
00877 if (netGetString(sd, buf) != NULL)
00878 {
00879 fileCount = atoi(buf);
00880 for (i=0; i<fileCount; ++i)
00881 {
00882 printf("%s\n", netRecieveString(sd, buf));
00883 }
00884 }
00885 close(sd);
00886 }
00887
00888 int main(int argc, char *argv[])
00889
00890 {
00891 char *command;
00892
00893 gfCatchPipes();
00894 dnaUtilOpen();
00895 optionInit(&argc, argv, optionSpecs);
00896 command = argv[1];
00897 if (optionExists("trans"))
00898 {
00899 doTrans = TRUE;
00900 tileSize = 4;
00901 minMatch = 3;
00902 maxGap = 0;
00903 repMatch = gfPepMaxTileUse;
00904 }
00905 tileSize = optionInt("tileSize", tileSize);
00906 stepSize = optionInt("stepSize", stepSize);
00907 if (stepSize == 0)
00908 stepSize = tileSize;
00909 repMatch = round( (double)tileSize/(double)stepSize * repMatch);
00910 minMatch = optionInt("minMatch", minMatch);
00911 repMatch = optionInt("repMatch", repMatch);
00912 maxDnaHits = optionInt("maxDnaHits", maxDnaHits);
00913 maxTransHits = optionInt("maxTransHits", maxTransHits);
00914 maxNtSize = optionInt("maxNtSize", maxNtSize);
00915 maxAaSize = optionInt("maxAaSize", maxAaSize);
00916 seqLog = optionExists("seqLog");
00917 doMask = optionExists("mask");
00918 canStop = optionExists("canStop");
00919 if (argc < 2)
00920 usage();
00921 if (optionExists("log"))
00922 logOpenFile(argv[0], optionVal("log", NULL));
00923 if (optionExists("syslog"))
00924 logOpenSyslog(argv[0], optionVal("logFacility", NULL));
00925
00926 if (sameWord(command, "direct"))
00927 {
00928 if (argc < 4)
00929 usage();
00930 genoFindDirect(argv[2], argc-3, argv+3);
00931 }
00932 else if (sameWord(command, "pcrDirect"))
00933 {
00934 if (argc < 5)
00935 usage();
00936 genoPcrDirect(argv[2], argv[3], argc-4, argv+4);
00937 }
00938 else if (sameWord(command, "start"))
00939 {
00940 if (argc < 5)
00941 usage();
00942 startServer(argv[2], argv[3], argc-4, argv+4);
00943 }
00944 else if (sameWord(command, "stop"))
00945 {
00946 if (argc != 4)
00947 usage();
00948 stopServer(argv[2], argv[3]);
00949 }
00950 else if (sameWord(command, "query"))
00951 {
00952 if (argc != 5)
00953 usage();
00954 queryServer(command, argv[2], argv[3], argv[4], FALSE, FALSE);
00955 }
00956 else if (sameWord(command, "protQuery"))
00957 {
00958 if (argc != 5)
00959 usage();
00960 queryServer(command, argv[2], argv[3], argv[4], TRUE, TRUE);
00961 }
00962 else if (sameWord(command, "transQuery"))
00963 {
00964 if (argc != 5)
00965 usage();
00966 queryServer(command, argv[2], argv[3], argv[4], TRUE, FALSE);
00967 }
00968 else if (sameWord(command, "pcr"))
00969 {
00970 if (argc != 7)
00971 usage();
00972 pcrServer(argv[2], argv[3], argv[4], argv[5], atoi(argv[6]));
00973 }
00974 else if (sameWord(command, "status"))
00975 {
00976 if (argc != 4)
00977 usage();
00978 if (statusServer(argv[2], argv[3]))
00979 {
00980 exit(-1);
00981 }
00982 }
00983 else if (sameWord(command, "files"))
00984 {
00985 if (argc != 4)
00986 usage();
00987 getFileList(argv[2], argv[3]);
00988 }
00989 else
00990 {
00991 usage();
00992 }
00993 return 0;
00994 }