lib/net.c

Go to the documentation of this file.
00001 /* net.c some stuff to wrap around net communications. 
00002  *
00003  * This file is copyright 2002 Jim Kent, but license is hereby
00004  * granted for all use - public, private or commercial. */
00005 
00006 #include "common.h"
00007 #include <signal.h>
00008 #include <errno.h>
00009 #include <string.h>
00010 #include "internet.h"
00011 #include "errabort.h"
00012 #include "hash.h"
00013 #include "net.h"
00014 #include "linefile.h"
00015 #include "base64.h"
00016 
00017 static char const rcsid[] = "$Id: net.c,v 1.55 2006/10/06 22:17:38 hiram Exp $";
00018 
00019 /* Brought errno in to get more useful error messages */
00020 
00021 extern int errno;
00022 
00023 static int netStreamSocket()
00024 /* Create a TCP/IP streaming socket.  Complain and return something
00025  * negative if can't */
00026 {
00027 int sd = socket(AF_INET, SOCK_STREAM, 0);
00028 if (sd < 0)
00029     warn("Couldn't make AF_INET socket.");
00030 return sd;
00031 }
00032 
00033 
00034 int netConnect(char *hostName, int port)
00035 /* Start connection with a server. */
00036 {
00037 int sd, err;
00038 struct sockaddr_in sai;         /* Some system socket info. */
00039 
00040 if (hostName == NULL)
00041     {
00042     warn("NULL hostName in netConnect");
00043     return -1;
00044     }
00045 if (!internetFillInAddress(hostName, port, &sai))
00046     return -1;
00047 if ((sd = netStreamSocket()) < 0)
00048     return sd;
00049 if ((err = connect(sd, (struct sockaddr*)&sai, sizeof(sai))) < 0)
00050    {
00051    warn("Couldn't connect to %s %d", hostName, port);
00052    close(sd);
00053    return err;
00054    }
00055 return sd;
00056 }
00057 
00058 int netMustConnect(char *hostName, int port)
00059 /* Start connection with server or die. */
00060 {
00061 int sd = netConnect(hostName, port);
00062 if (sd < 0)
00063    noWarnAbort();
00064 return sd;
00065 }
00066 
00067 int netMustConnectTo(char *hostName, char *portName)
00068 /* Start connection with a server and a port that needs to be converted to integer */
00069 {
00070 if (!isdigit(portName[0]))
00071     errAbort("netConnectTo: ports must be numerical, not %s", portName);
00072 return netMustConnect(hostName, atoi(portName));
00073 }
00074 
00075 int netAcceptingSocketFrom(int port, int queueSize, char *host)
00076 /* Create a socket that can accept connections from a 
00077  * IP address on the current machine if the current machine
00078  * has multiple IP addresses. */
00079 {
00080 struct sockaddr_in sai;
00081 int sd;
00082 int flag = 1;
00083 
00084 netBlockBrokenPipes();
00085 if ((sd = netStreamSocket()) < 0)
00086     return sd;
00087 if (!internetFillInAddress(host, port, &sai))
00088     return -1;
00089 if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof(int)))
00090     return -1;
00091 if (bind(sd, (struct sockaddr*)&sai, sizeof(sai)) == -1)
00092     {
00093     warn("Couldn't bind socket to %d: %s", port, strerror(errno));
00094     close(sd);
00095     return -1;
00096     }
00097 listen(sd, queueSize);
00098 return sd;
00099 }
00100 
00101 int netAcceptingSocket(int port, int queueSize)
00102 /* Create a socket that can accept connections from
00103  * anywhere. */
00104 {
00105 return netAcceptingSocketFrom(port, queueSize, NULL);
00106 }
00107 
00108 int netAccept(int sd)
00109 /* Accept incoming connection from socket descriptor. */
00110 {
00111 socklen_t fromLen;
00112 return accept(sd, NULL, &fromLen);
00113 }
00114 
00115 int netAcceptFrom(int acceptor, unsigned char subnet[4])
00116 /* Wait for incoming connection from socket descriptor
00117  * from IP address in subnet.  Subnet is something
00118  * returned from netParseSubnet or internetParseDottedQuad. 
00119  * Subnet may be NULL. */
00120 {
00121 struct sockaddr_in sai;         /* Some system socket info. */
00122 ZeroVar(&sai);
00123 sai.sin_family = AF_INET;
00124 for (;;)
00125     {
00126     socklen_t addrSize = sizeof(sai);
00127     int sd = accept(acceptor, (struct sockaddr *)&sai, &addrSize);
00128     if (sd >= 0)
00129         {
00130         if (subnet == NULL)
00131             return sd;
00132         else
00133             {
00134             unsigned char unpacked[4]; 
00135             internetUnpackIp(ntohl(sai.sin_addr.s_addr), unpacked);
00136             if (internetIpInSubnet(unpacked, subnet))
00137                 {
00138                 return sd;
00139                 }
00140             else
00141                 {
00142                 close(sd);
00143                 }
00144             }
00145         }
00146     }
00147 }
00148 
00149 FILE *netFileFromSocket(int socket)
00150 /* Wrap a FILE around socket.  This should be fclose'd
00151  * and separately the socket close'd. */
00152 {
00153 FILE *f;
00154 if ((socket = dup(socket)) < 0)
00155    errnoAbort("Couldn't dupe socket in netFileFromSocket");
00156 f = fdopen(socket, "r+");
00157 if (f == NULL)
00158    errnoAbort("Couldn't fdopen socket in netFileFromSocket");
00159 return f;
00160 }
00161 
00162 static boolean plumberInstalled = FALSE;
00163 
00164 void netBlockBrokenPipes()
00165 /* Make it so a broken pipe doesn't kill us. */
00166 {
00167 if (!plumberInstalled)
00168     {
00169     signal(SIGPIPE, SIG_IGN);       /* Block broken pipe signals. */
00170     plumberInstalled = TRUE;
00171     }
00172 }
00173 
00174 int netReadAll(int sd, void *vBuf, size_t size)
00175 /* Read given number of bytes into buffer.
00176  * Don't give up on first read! */
00177 {
00178 char *buf = vBuf;
00179 size_t totalRead = 0;
00180 int oneRead;
00181 
00182 if (!plumberInstalled)
00183     netBlockBrokenPipes();
00184 while (totalRead < size)
00185     {
00186     oneRead = read(sd, buf + totalRead, size - totalRead);
00187     if (oneRead < 0)
00188         return oneRead;
00189     if (oneRead == 0)
00190         break;
00191     totalRead += oneRead;
00192     }
00193 return totalRead;
00194 }
00195 
00196 int netMustReadAll(int sd, void *vBuf, size_t size)
00197 /* Read given number of bytes into buffer or die.
00198  * Don't give up if first read is short! */
00199 {
00200 int ret = netReadAll(sd, vBuf, size);
00201 if (ret < 0)
00202     errnoAbort("Couldn't finish netReadAll");
00203 return ret;
00204 }
00205 
00206 static void notGoodSubnet(char *sns)
00207 /* Complain about subnet format. */
00208 {
00209 errAbort("'%s' is not a properly formatted subnet.  Subnets must consist of\n"
00210          "one to three dot-separated numbers between 0 and 255\n", sns);
00211 }
00212 
00213 void netParseSubnet(char *in, unsigned char out[4])
00214 /* Parse subnet, which is a prefix of a normal dotted quad form.
00215  * Out will contain 255's for the don't care bits. */
00216 {
00217 out[0] = out[1] = out[2] = out[3] = 255;
00218 if (in != NULL)
00219     {
00220     char *snsCopy = strdup(in);
00221     char *words[5];
00222     int wordCount, i;
00223     wordCount = chopString(snsCopy, ".", words, ArraySize(words));
00224     if (wordCount > 3 || wordCount < 1)
00225         notGoodSubnet(in);
00226     for (i=0; i<wordCount; ++i)
00227         {
00228         char *s = words[i];
00229         int x;
00230         if (!isdigit(s[0]))
00231             notGoodSubnet(in);
00232         x = atoi(s);
00233         if (x > 255)
00234             notGoodSubnet(in);
00235         out[i] = x;
00236         }
00237     freez(&snsCopy);
00238     }
00239 }
00240 
00241 void netParseUrl(char *url, struct netParsedUrl *parsed)
00242 /* Parse a URL into components.   A full URL is made up as so:
00243  *   http://user:password@hostName:port/file
00244  * This is set up so that the http:// and the port are optional. 
00245  */
00246 {
00247 char *s, *t, *u, *v, *w;
00248 char buf[1024];
00249 
00250 /* Make local copy of URL. */
00251 if (strlen(url) >= sizeof(buf))
00252     errAbort("Url too long: '%s'", url);
00253 strcpy(buf, url);
00254 url = buf;
00255 
00256 /* Find out protocol - default to http. */
00257 s = trimSpaces(url);
00258 s = stringIn("://", url);
00259 if (s == NULL)
00260     {
00261     strcpy(parsed->protocol, "http");
00262     s = url;
00263     }
00264 else
00265     {
00266     *s = 0;
00267     tolowers(url);
00268     strncpy(parsed->protocol, url, sizeof(parsed->protocol));
00269     s += 3;
00270     }
00271 
00272 /* Split off file part. */
00273 u = strchr(s, '/');
00274 if (u == NULL)
00275    strcpy(parsed->file, "/");
00276 else
00277    {
00278    /* need to encode spaces, but not ! other characters */
00279    char *t=replaceChars(u," ","%20");
00280    strncpy(parsed->file, t, sizeof(parsed->file));
00281    freeMem(t);
00282    *u = 0;
00283    }
00284 
00285 
00286 /* Split off user part */
00287 v = strchr(s, '@');
00288 if (v == NULL)
00289    {
00290    if (sameWord(parsed->protocol,"http"))
00291       {
00292       strcpy(parsed->user, "");
00293       strcpy(parsed->password, "");
00294       }
00295    if (sameWord(parsed->protocol,"ftp"))
00296       {
00297       strcpy(parsed->user, "anonymous");
00298       strcpy(parsed->password, "x@genome.ucsc.edu");
00299       }
00300    }
00301 else
00302    {
00303    *v = 0;
00304    /* split off password part */
00305    w = strchr(s, ':');
00306    if (w == NULL)
00307       {
00308       strncpy(parsed->user, s, sizeof(parsed->user));
00309       strcpy(parsed->password, "");
00310       }
00311    else
00312       {
00313       *w = 0;
00314       strncpy(parsed->user, s, sizeof(parsed->user));
00315       strncpy(parsed->password, w+1, sizeof(parsed->password));
00316       }
00317    s = v+1;
00318    }
00319 
00320 
00321 /* Save port if it's there.  If not default to 80. */
00322 t = strchr(s, ':');
00323 if (t == NULL)
00324    {
00325    if (sameWord(parsed->protocol,"http"))
00326       strcpy(parsed->port, "80");
00327    if (sameWord(parsed->protocol,"ftp"))
00328       strcpy(parsed->port, "21");
00329    }
00330 else
00331    {
00332    *t++ = 0;
00333    if (!isdigit(t[0]))
00334       errAbort("Non-numeric port name %s", t);
00335    strncpy(parsed->port, t, sizeof(parsed->port));
00336    }
00337 
00338 /* What's left is the host. */
00339 strncpy(parsed->host, s, sizeof(parsed->host));
00340 }
00341 
00342 /* this was cloned from rudp.c - move it later for sharing */
00343 static boolean readReadyWait(int sd, int microseconds)
00344 /* Wait for descriptor to have some data to read, up to
00345  * given number of microseconds. */
00346 {
00347 struct timeval tv;
00348 fd_set set;
00349 int readyCount;
00350 
00351 for (;;)
00352     {
00353     if (microseconds > 1000000)
00354         {
00355         tv.tv_sec = microseconds/1000000;
00356         tv.tv_usec = microseconds%1000000;
00357         }
00358     else
00359         {
00360         tv.tv_sec = 0;
00361         tv.tv_usec = microseconds;
00362         }
00363     FD_ZERO(&set);
00364     FD_SET(sd, &set);
00365     readyCount = select(sd+1, &set, NULL, NULL, &tv);
00366     if (readyCount < 0) 
00367         {
00368         if (errno == EINTR)     /* Select interrupted, not timed out. */
00369             continue;
00370         else 
00371             warn("select failure in rudp: %s", strerror(errno));
00372         }
00373     else
00374         {
00375         return readyCount > 0;  /* Zero readyCount indicates time out */
00376         }
00377     }
00378 }
00379 
00380 struct dyString *sendFtpCommand(int sd, char *cmd, boolean seeResult, boolean noTimeoutError)
00381 /* send command to ftp server and check resulting reply code, 
00382    give error if not desired reply */
00383 {   
00384 struct dyString *rs = NULL;
00385 int reply = 0;
00386 char buf[4*1024];
00387 int readSize;
00388 char *startLastLine = NULL;
00389 long timeOut = 1000000; /* wait in microsec */
00390 
00391 write(sd, cmd, strlen(cmd));
00392 
00393 rs = newDyString(4*1024);
00394 while (1)
00395     {
00396     while (1)
00397         {
00398         if (!readReadyWait(sd, timeOut))
00399             {
00400             if (!noTimeoutError)
00401                 errAbort("ftp server response timed out > %ld microsec",timeOut);
00402             return rs;
00403             }
00404         if ((readSize = read(sd, buf, sizeof(buf))) == 0)
00405             break;
00406 
00407         dyStringAppendN(rs, buf, readSize);
00408         if (endsWith(rs->string,"\n"))
00409             break;
00410         }
00411         
00412     /* find the start of the last line in the buffer */
00413     startLastLine = rs->string+strlen(rs->string)-1;
00414     if (startLastLine >= rs->string)
00415         if (*startLastLine == '\n') 
00416             --startLastLine;
00417     while ((startLastLine >= rs->string) && (*startLastLine != '\n'))
00418         --startLastLine;
00419     ++startLastLine;
00420         
00421     if (strlen(startLastLine)>4)
00422       if (
00423         isdigit(startLastLine[0]) &&
00424         isdigit(startLastLine[1]) &&
00425         isdigit(startLastLine[2]) &&
00426         startLastLine[3]==' ')
00427         break;
00428         
00429     /* must be some text info we can't use, ignore it till we get status code */
00430 
00431     }
00432 
00433 reply = atoi(startLastLine);
00434 
00435 if ((reply < 200) || (reply > 399))
00436     errAbort("ftp server error on cmd=[%s] response=[%s]\n",cmd,rs->string);
00437     
00438 if (!seeResult) dyStringFree(&rs);
00439 
00440 return rs;
00441 }
00442 
00443 
00444 int parsePasvPort(char *rs)
00445 /* parse PASV reply to get the port and return it */
00446 {
00447 char *rsCopy = strdup(rs);
00448 char *words[7];
00449 int wordCount;
00450 char *rsStart = strchr(rs,'(');
00451 char *rsEnd = strchr(rs,')');
00452 int result = 0;
00453 rsStart++;
00454 *rsEnd=0;
00455 wordCount = chopString(rsStart, ",", words, ArraySize(words));
00456 if (wordCount != 6)
00457     errAbort("PASV reply does not parse correctly");
00458 result = atoi(words[4])*256+atoi(words[5]);    
00459 freez(&rsCopy);
00460 return result;
00461 }    
00462 
00463 
00464 int netGetOpenFtp(char *url)
00465 /* Return a file handle that will read the url. */
00466 {
00467 struct netParsedUrl npu;
00468 struct dyString *rs = NULL;
00469 int sd;
00470 long timeOut = 1000000; /* wait in microsec */
00471 char cmd[256];
00472 
00473 /* Parse the URL and connect. */
00474 netParseUrl(url, &npu);
00475 if (!sameString(npu.protocol, "ftp"))
00476     errAbort("Sorry, can only netOpen ftp's currently");
00477 sd = netMustConnect(npu.host, atoi(npu.port));
00478 
00479 /* Ask remote ftp server for a file. */
00480 
00481 /* don't send a command, just read the welcome msg */
00482 if (readReadyWait(sd, timeOut))
00483     sendFtpCommand(sd, "", FALSE, FALSE);
00484 
00485 safef(cmd,sizeof(cmd),"USER %s\r\n",npu.user);
00486 sendFtpCommand(sd, cmd, FALSE, FALSE);
00487 
00488 safef(cmd,sizeof(cmd),"PASS %s\r\n",npu.password);
00489 sendFtpCommand(sd, cmd, FALSE, FALSE);
00490 
00491 sendFtpCommand(sd, "TYPE I\r\n", FALSE, FALSE);
00492 /* 200 Type set to I */
00493 /* (send the data as binary, so can support compressed files) */
00494 
00495 rs = sendFtpCommand(sd, "PASV\r\n", TRUE, FALSE);
00496 /* 227 Entering Passive Mode (128,231,210,81,222,250) */
00497 
00498 safef(cmd,sizeof(cmd),"RETR %s\r\n", npu.file);
00499 /* we can't wait for reply because 
00500    we need to start the next fetch connect 
00501    but then if there is an error e.g. missing file,
00502    then we don't see the err msg because we
00503    already closed the port and are waiting.
00504    And our timeout is long - indefinitely so?
00505 */
00506 sendFtpCommand(sd, cmd, FALSE, TRUE);  
00507 
00508 close(sd);
00509 
00510 sd = netMustConnect(npu.host, parsePasvPort(rs->string));
00511 
00512 /* Clean up and return handle. */
00513 dyStringFree(&rs);
00514 return sd;
00515 }
00516 
00517 int netHttpConnect(char *url, char *method, char *protocol, char *agent)
00518 /* Parse URL, connect to associated server on port,
00519  * and send most of the request to the server.  If
00520  * specified in the url send user name and password
00521  * too.  This does not send the final \r\n to finish
00522  * off the request, so that you can send cookies. 
00523  * Typically the "method" will be "GET" or "POST"
00524  * and the agent will be the name of your program or
00525  * library. */
00526 {
00527 struct netParsedUrl npu;
00528 struct dyString *dy = newDyString(512);
00529 int sd;
00530 
00531 /* Parse the URL and connect. */
00532 netParseUrl(url, &npu);
00533 if (!sameString(npu.protocol, "http"))
00534     errAbort("Sorry, can only netOpen http's currently");
00535 sd = netMustConnect(npu.host, atoi(npu.port));
00536 
00537 /* Ask remote server for a file. */
00538 dyStringPrintf(dy, "%s %s %s\r\n", method, npu.file, protocol);
00539 dyStringPrintf(dy, "User-Agent: %s\r\n", agent);
00540 /* do not need the 80 since it is the default */
00541 if (sameString("80",npu.port))
00542     dyStringPrintf(dy, "Host: %s\r\n", npu.host);
00543 else
00544     dyStringPrintf(dy, "Host: %s:%s\r\n", npu.host, npu.port);
00545 if (!sameString(npu.user,""))
00546     {
00547     char up[256];
00548     char *b64up = NULL;
00549     safef(up, sizeof(up), "%s:%s", npu.user, npu.password);
00550     b64up = base64Encode(up, strlen(up));
00551     dyStringPrintf(dy, "Authorization: Basic %s\r\n", b64up);
00552     freez(&b64up);
00553     }
00554 dyStringAppend(dy, "Accept: */*\r\n");
00555 write(sd, dy->string, dy->stringSize);
00556 
00557 /* Clean up and return handle. */
00558 dyStringFree(&dy);
00559 return sd;
00560 }
00561 
00562 int netOpenHttpExt(char *url, char *method, boolean end)
00563 /* Return a file handle that will read the url.  If end is not
00564  * set then can send cookies and other info to returned file 
00565  * handle before reading. */
00566 {
00567 int sd =  netHttpConnect(url, method, "HTTP/1.0", "genome.ucsc.edu/net.c");
00568 if (end)
00569     write(sd, "\r\n", 2);
00570 return sd;
00571 }
00572 
00573 static int netGetOpenHttp(char *url)
00574 /* Return a file handle that will read the url.  */
00575 {
00576 return netOpenHttpExt(url, "GET", TRUE);
00577 }
00578 
00579 int netUrlHead(char *url, struct hash *hash)
00580 /* Go get head and return status.  Return negative number if
00581  * can't get head. If hash is non-null, fill it with header
00582  * lines, including hopefully Content-Type: */
00583 {
00584 int sd = netOpenHttpExt(url, "HEAD", TRUE);
00585 int status = EIO;
00586 if (sd >= 0)
00587     {
00588     char *line, *word;
00589     struct lineFile *lf = lineFileAttach(url, TRUE, sd);
00590 
00591     if (lineFileNext(lf, &line, NULL))
00592         {
00593         if (startsWith("HTTP/", line))
00594             {
00595             word = nextWord(&line);
00596             word = nextWord(&line);
00597             if (word != NULL && isdigit(word[0]))
00598                 {
00599                 status = atoi(word);
00600                 if (hash != NULL)
00601                     {
00602                     while (lineFileNext(lf, &line, NULL))
00603                         {
00604                         word = nextWord(&line);
00605                         if (word == NULL)
00606                             break;
00607                         hashAdd(hash, word, cloneString(skipLeadingSpaces(line)));
00608                         }
00609                     }
00610                 }
00611             }
00612         }
00613     lineFileClose(&lf);
00614     }
00615 else
00616     status = errno;
00617 return status;
00618 }
00619 
00620 int netUrlOpen(char *url)
00621 /* Return unix low-level file handle for url. 
00622  * Just close(result) when done. */
00623 {
00624 if (startsWith("http://",url) || (stringIn("://", url) == NULL))
00625     return netGetOpenHttp(url);
00626 else if (startsWith("ftp://",url))
00627     return netGetOpenFtp(url);
00628 else    
00629     errAbort("Sorry, can only netOpen http and ftp currently");
00630 return -1;    
00631 }
00632 
00633 struct dyString *netSlurpFile(int sd)
00634 /* Slurp file into dynamic string and return. */
00635 {
00636 char buf[4*1024];
00637 int readSize;
00638 struct dyString *dy = newDyString(4*1024);
00639 
00640 /* Slurp file into dy and return. */
00641 while ((readSize = read(sd, buf, sizeof(buf))) > 0)
00642     dyStringAppendN(dy, buf, readSize);
00643 return dy;
00644 }
00645 
00646 struct dyString *netSlurpUrl(char *url)
00647 /* Go grab all of URL and return it as dynamic string. */
00648 {
00649 int sd = netUrlOpen(url);
00650 struct dyString *dy = netSlurpFile(sd);
00651 close(sd);
00652 return dy;
00653 }
00654 
00655 int netSkipHttpHeaderLinesCatch(int sd, char *url)
00656 /* Skip http header lines. Return non zero error if there's a problem.
00657    The input is a standard sd or fd descriptor.
00658    This is meant to be able work even with a re-passable stream handle,
00659    e.g. can pass it to the pipes routines, which means we can't
00660    attach a linefile since filling its buffer reads in more than just the http header.
00661    positive return code is http error, -1 = http header too long, -2 is a strange http header, 
00662    -3 is error reading descriptor.
00663  */
00664 {
00665 char buf[2000];
00666 char *line = buf;
00667 int maxbuf = sizeof(buf);
00668 int i=0;
00669 char c = ' ';
00670 int nread = 0;
00671 char *sep = NULL;
00672 char *headerName = NULL;
00673 char *headerVal = NULL;
00674 while(TRUE)
00675     {
00676     i = 0;
00677     while (TRUE)
00678         {
00679         nread = read(sd, &c, 1);  /* one char at a time, but http headers are small */
00680         if (nread < 0)
00681             return -3;  /* err reading descriptor */
00682         if (c == 10)
00683             break;
00684         if (c != 13)
00685             buf[i++] = c;
00686         if (i >= maxbuf)
00687             {
00688             return -1;
00689             }
00690         }
00691     buf[i] = 0;  /* add string terminator */
00692 
00693     if (sameString(line,""))
00694         {
00695         break; /* End of Header found */
00696         }
00697     if (startsWith("HTTP/", line))
00698         {
00699         char *version, *code;
00700         version = nextWord(&line);
00701         code = nextWord(&line);
00702         if (code == NULL)
00703             {
00704             return -2;
00705             }
00706         if (startsWith("30", code) && isdigit(code[2]) && code[3] == 0)
00707             {
00708             return (atoi(code));
00709             }
00710         else if (!sameString(code, "200"))
00711             {
00712             return atoi(code);
00713             }
00714         line = buf;  /* restore it */
00715         }
00716     headerName = line;
00717     sep = strchr(line,':');
00718     if (sep)
00719         {
00720         *sep = 0;
00721         headerVal = skipLeadingSpaces(++sep);
00722         }
00723     else
00724         {
00725         headerVal = NULL;
00726         }
00727     }
00728 return 0;
00729 }
00730 
00731 boolean netSkipHttpHeaderLines(int sd, char *url)
00732 /* Skip http header lines. Return FALSE if there's a problem.
00733    The input is a standard sd or fd descriptor.
00734    This is meant to be able work even with a re-passable stream handle,
00735    e.g. can pass it to the pipes routines, which means we can't
00736    attach a linefile since filling its buffer reads in more than just the http header.
00737  */
00738 {
00739 char buf[2000];
00740 char *line = buf;
00741 int maxbuf = sizeof(buf);
00742 int i=0;
00743 char c = ' ';
00744 int nread = 0;
00745 struct dyString *redirectMsg = NULL;
00746 char *sep = NULL;
00747 char *headerName = NULL;
00748 char *headerVal = NULL;
00749 while(TRUE)
00750     {
00751     i = 0;
00752     while (TRUE)
00753         {
00754         nread = read(sd, &c, 1);  /* one char at a time, but http headers are small */
00755         if (nread < 0)
00756             return FALSE;  /* err reading descriptor */
00757         if (c == 10)
00758             break;
00759         if (c != 13)
00760             buf[i++] = c;
00761         if (i >= maxbuf)
00762             {
00763             warn("http header line too long > %d chars.",maxbuf);
00764             return FALSE;
00765             }
00766         }
00767     buf[i] = 0;  /* add string terminator */
00768 
00769     if (sameString(line,""))
00770         {
00771         break; /* End of Header found */
00772         }
00773     if (startsWith("HTTP/", line))
00774         {
00775         char *version, *code;
00776         version = nextWord(&line);
00777         code = nextWord(&line);
00778         if (code == NULL)
00779             {
00780             warn("Strange http header on %s\n", url);
00781             return FALSE;
00782             }
00783         if (startsWith("30", code) && isdigit(code[2]) && code[3] == 0)
00784             {
00785             redirectMsg = newDyString(256);
00786             dyStringPrintf(redirectMsg,"Your URL \"%s\" resulted in a redirect message "
00787                  "(HTTP status code %s %s).  <BR>\n"
00788                  "Sorry, redirects are not supported.",
00789                  url, code, line);
00790             }
00791         else if (!sameString(code, "200"))
00792             {
00793             warn("%s: %s %s\n", url, code, line);
00794             return FALSE;
00795             }
00796         line = buf;  /* restore it */
00797         }
00798     headerName = line;
00799     sep = strchr(line,':');
00800     if (sep)
00801         {
00802         *sep = 0;
00803         headerVal = skipLeadingSpaces(++sep);
00804         }
00805     else
00806         {
00807         headerVal = NULL;
00808         }
00809     if (sameWord(headerName,"Location"))
00810         {
00811         if (redirectMsg)
00812             {
00813             dyStringPrintf(redirectMsg, " Redirection location: <A HREF=\"%s\">%s</A>", 
00814                 headerVal, headerVal);
00815             }
00816         }
00817     }
00818 if (redirectMsg)
00819     {
00820     warn("%s", redirectMsg->string);
00821     dyStringFree(&redirectMsg);
00822     return FALSE;
00823     }           
00824 return TRUE;
00825 }
00826 
00827 struct lineFile *netLineFileMayOpenCatchError(char *url)
00828 /* Return a lineFile attached to url. http skips header.
00829  * Supports some compression formats.
00830  * Return NULL if there's a problem without printing error.  */
00831 {
00832 int sd = netUrlOpen(url);
00833 if (sd < 0)
00834     {
00835     warn("Couldn't open %s", url);
00836     return NULL;
00837     }
00838 else
00839     {
00840     struct lineFile *lf = NULL;
00841     if (startsWith("http://",url))
00842         {
00843         if (netSkipHttpHeaderLinesCatch(sd, url) != 0)
00844             return NULL;     /* url needed only for err msgs*/
00845         }
00846     if (endsWith(url, ".gz") ||
00847         endsWith(url, ".Z")  ||
00848         endsWith(url, ".bz2"))
00849         {
00850         lf = lineFileDecompressFd(url, TRUE, sd);
00851            /* url needed only for compress type determination */
00852         }
00853     else
00854         {
00855         lf = lineFileAttach(url, TRUE, sd);
00856         }
00857     return lf;
00858     }
00859 }
00860 
00861 struct lineFile *netLineFileMayOpen(char *url)
00862 /* Return a lineFile attached to url. http skips header.
00863  * Supports some compression formats.
00864  * Return NULL if there's a problem. */
00865 {
00866 int sd = netUrlOpen(url);
00867 if (sd < 0)
00868     {
00869     warn("Couldn't open %s", url);
00870     return NULL;
00871     }
00872 else
00873     {
00874     struct lineFile *lf = NULL;
00875     if (startsWith("http://",url))
00876         {
00877         if (!netSkipHttpHeaderLines(sd, url))
00878             return NULL;     /* url needed only for err msgs*/
00879         }
00880     if (endsWith(url, ".gz") ||
00881         endsWith(url, ".Z")  ||
00882         endsWith(url, ".bz2"))
00883         {
00884         lf = lineFileDecompressFd(url, TRUE, sd);
00885            /* url needed only for compress type determination */
00886         }
00887     else
00888         {
00889         lf = lineFileAttach(url, TRUE, sd);
00890         }
00891     return lf;
00892     }
00893 }
00894 
00895 struct lineFile *netLineFileOpen(char *url)
00896 /* Return a lineFile attached to url.  This one
00897  * will skip any headers.   Free this with
00898  * lineFileClose(). */
00899 {
00900 struct lineFile *lf = netLineFileMayOpen(url);
00901 if (lf == NULL)
00902     noWarnAbort();
00903 return lf;
00904 }
00905 
00906 boolean netSendString(int sd, char *s)
00907 /* Send a string down a socket - length byte first. */
00908 {
00909 int length = strlen(s);
00910 UBYTE len;
00911 
00912 if (length > 255)
00913     errAbort("Trying to send a string longer than 255 bytes (%d bytes)", length);
00914 len = length;
00915 if (write(sd, &len, 1)<0)
00916     {
00917     warn("Couldn't send string to socket");
00918     return FALSE;
00919     }
00920 if (write(sd, s, length)<0)
00921     {
00922     warn("Couldn't send string to socket");
00923     return FALSE;
00924     }
00925 return TRUE;
00926 }
00927 
00928 boolean netSendLongString(int sd, char *s)
00929 /* Send a long string down socket: two bytes for length. */
00930 {
00931 unsigned length = strlen(s);
00932 UBYTE b[2];
00933 
00934 if (length >= 64*1024)
00935     {
00936     warn("Trying to send a string longer than 64k bytes (%d bytes)", length);
00937     return FALSE;
00938     }
00939 b[0] = (length>>8);
00940 b[1] = (length&0xff);
00941 if (write(sd, b, 2) < 0)
00942     {
00943     warn("Couldn't send long string to socket");
00944     return FALSE;
00945     }
00946 if (write(sd, s, length)<0)
00947     {
00948     warn("Couldn't send long string to socket");
00949     return FALSE;
00950     }
00951 return TRUE;
00952 }
00953 
00954 boolean netSendHugeString(int sd, char *s)
00955 /* Send a long string down socket: four bytes for length. */
00956 {
00957 unsigned long length = strlen(s);
00958 unsigned long l = length;
00959 UBYTE b[4];
00960 int i;
00961 for (i=3; i>=0; --i)
00962     {
00963     b[i] = l & 0xff;
00964     l >>= 8;
00965     }
00966 if (write(sd, b, 4) < 0)
00967     {
00968     warn("Couldn't send huge string to socket");
00969     return FALSE;
00970     }
00971 if (write(sd, s, length) < 0)
00972     {
00973     warn("Couldn't send huge string to socket");
00974     return FALSE;
00975     }
00976 return TRUE;
00977 }
00978 
00979 
00980 char *netGetString(int sd, char buf[256])
00981 /* Read string into buf and return it.  If buf is NULL
00982  * an internal buffer will be used. Print warning message
00983  * and return NULL if any problem. */
00984 {
00985 static char sbuf[256];
00986 UBYTE len = 0;
00987 int length;
00988 int sz;
00989 if (buf == NULL) buf = sbuf;
00990 sz = netReadAll(sd, &len, 1);
00991 if (sz == 0)
00992     return NULL;
00993 if (sz < 0)
00994     {
00995     warn("Couldn't read string length");
00996     return NULL;
00997     }
00998 length = len;
00999 if (length > 0)
01000     if (netReadAll(sd, buf, length) < 0)
01001         {
01002         warn("Couldn't read string body");
01003         return NULL;
01004         }
01005 buf[length] = 0;
01006 return buf;
01007 }
01008 
01009 char *netGetLongString(int sd)
01010 /* Read string and return it.  freeMem
01011  * the result when done. */
01012 {
01013 UBYTE b[2];
01014 char *s = NULL;
01015 int length = 0;
01016 int sz;
01017 b[0] = b[1] = 0;
01018 sz = netReadAll(sd, b, 2);
01019 if (sz == 0)
01020     return NULL;
01021 if (sz < 0)
01022     {
01023     warn("Couldn't read long string length");
01024     return NULL;
01025     }
01026 length = (b[0]<<8) + b[1];
01027 s = needMem(length+1);
01028 if (length > 0)
01029     if (netReadAll(sd, s, length) < 0)
01030         {
01031         warn("Couldn't read long string body");
01032         return NULL;
01033         }
01034 s[length] = 0;
01035 return s;
01036 }
01037 
01038 char *netGetHugeString(int sd)
01039 /* Read string and return it.  freeMem
01040  * the result when done. */
01041 {
01042 UBYTE b[4];
01043 char *s = NULL;
01044 unsigned long length = 0;
01045 int sz, i;
01046 sz = netReadAll(sd, b, 4);
01047 if (sz == 0)
01048     return NULL;
01049 if (sz < 4)
01050     {
01051     warn("Couldn't read huge string length");
01052     return NULL;
01053     }
01054 for (i=0; i<4; ++i)
01055     {
01056     length <<= 8;
01057     length += b[i];
01058     }
01059 s = needMem(length+1);
01060 if (length > 0)
01061     {
01062     if (netReadAll(sd, s, length) < 0)
01063         {
01064         warn("Couldn't read huge string body");
01065         return NULL;
01066         }
01067     }
01068 s[length] = 0;
01069 return s;
01070 }
01071 
01072 
01073 char *netRecieveString(int sd, char buf[256])
01074 /* Read string into buf and return it.  If buf is NULL
01075  * an internal buffer will be used. Abort if any problem. */
01076 {
01077 char *s = netGetString(sd, buf);
01078 if (s == NULL)
01079      noWarnAbort();   
01080 return s;
01081 }
01082 
01083 char *netRecieveLongString(int sd)
01084 /* Read string and return it.  freeMem
01085  * the result when done. Abort if any problem*/
01086 {
01087 char *s = netGetLongString(sd);
01088 if (s == NULL)
01089      noWarnAbort();   
01090 return s;
01091 }
01092 
01093 char *netRecieveHugeString(int sd)
01094 /* Read string and return it.  freeMem
01095  * the result when done. Abort if any problem*/
01096 {
01097 char *s = netGetHugeString(sd);
01098 if (s == NULL)
01099      noWarnAbort();   
01100 return s;
01101 }
01102 
01103 
01104 struct lineFile *netHttpLineFileMayOpen(char *url, struct netParsedUrl **npu)
01105 /* Parse URL and open an HTTP socket for it but don't send a request yet. */
01106 {
01107 int sd;
01108 struct lineFile *lf;
01109 
01110 /* Parse the URL and try to connect. */
01111 AllocVar(*npu);
01112 netParseUrl(url, *npu);
01113 if (!sameString((*npu)->protocol, "http"))
01114     errAbort("Sorry, can only netOpen http's currently");
01115 sd = netConnect((*npu)->host, atoi((*npu)->port));
01116 if (sd < 0)
01117     return NULL;
01118 
01119 /* Return handle. */
01120 lf = lineFileAttach(url, TRUE, sd);
01121 return lf;
01122 } /* netHttpLineFileMayOpen */
01123 
01124 
01125 void netHttpGet(struct lineFile *lf, struct netParsedUrl *npu,
01126                 boolean keepAlive)
01127 /* Send a GET request, possibly with Keep-Alive. */
01128 {
01129 struct dyString *dy = newDyString(512);
01130 
01131 /* Ask remote server for the file/query. */
01132 dyStringPrintf(dy, "GET %s HTTP/1.1\r\n", npu->file);
01133 dyStringPrintf(dy, "User-Agent: genome.ucsc.edu/net.c\r\n");
01134 dyStringPrintf(dy, "Host: %s:%s\r\n", npu->host, npu->port);
01135 if (!sameString(npu->user,""))
01136     {
01137     char up[256];
01138     char *b64up = NULL;
01139     safef(up,sizeof(up), "%s:%s", npu->user, npu->password);
01140     b64up = base64Encode(up, strlen(up));
01141     dyStringPrintf(dy, "Authorization: Basic %s\r\n", b64up);
01142     freez(&b64up);
01143     }
01144 dyStringAppend(dy, "Accept: */*\r\n");
01145 if (keepAlive)
01146   {
01147     dyStringAppend(dy, "Connection: Keep-Alive\r\n");
01148     dyStringAppend(dy, "Connection: Persist\r\n");
01149   }
01150 else
01151     dyStringAppend(dy, "Connection: close\r\n");
01152 dyStringAppend(dy, "\r\n");
01153 write(lf->fd, dy->string, dy->stringSize);
01154 /* Clean up. */
01155 dyStringFree(&dy);
01156 } /* netHttpGet */
01157 
01158 int netHttpGetMultiple(char *url, struct slName *queries, void *userData,
01159                        void (*responseCB)(void *userData, char *req,
01160                                           char *hdr, struct dyString *body))
01161 /* Given an URL which is the base of all requests to be made, and a 
01162  * linked list of queries to be appended to that base and sent in as 
01163  * requests, send the requests as a batch and read the HTTP response 
01164  * headers and bodies.  If not all the requests get responses (i.e. if 
01165  * the server is ignoring Keep-Alive or is imposing a limit), try again 
01166  * until we can't connect or until all requests have been served. 
01167  * For each HTTP response, do a callback. */
01168 {
01169   struct slName *qStart;
01170   struct slName *qPtr;
01171   struct lineFile *lf;
01172   struct netParsedUrl *npu;
01173   struct dyString *dyQ    = newDyString(512);
01174   struct dyString *body;
01175   char *base;
01176   char *hdr;
01177   int qCount;
01178   int qTotal;
01179   int numParseFailures;
01180   int contentLength;
01181   boolean chunked;
01182   boolean done;
01183   boolean keepAlive;
01184 
01185   /* Find out how many queries we'll need to do so we know how many times 
01186    * it's OK to run into end of file in case server ignores Keep-Alive. */
01187   qTotal = 0;
01188   for (qPtr = queries;  qPtr != NULL;  qPtr = qPtr->next)
01189     {
01190       qTotal++;
01191     }
01192 
01193   done = FALSE;
01194   qCount = 0;
01195   numParseFailures = 0;
01196   qStart = queries;
01197   while ((! done) && (qStart != NULL))
01198     {
01199       lf = netHttpLineFileMayOpen(url, &npu);
01200       if (lf == NULL)
01201         {
01202           done = TRUE;
01203           break;
01204         }
01205       base = cloneString(npu->file);
01206       /* Send all remaining requests with keep-alive. */
01207       for (qPtr = qStart;  qPtr != NULL;  qPtr = qPtr->next)
01208         {
01209           dyStringClear(dyQ);
01210           dyStringAppend(dyQ, base);
01211           dyStringAppend(dyQ, qPtr->name);
01212           strcpy(npu->file, dyQ->string);
01213           keepAlive = (qPtr->next == NULL) ? FALSE : TRUE;
01214           netHttpGet(lf, npu, keepAlive);
01215         }
01216       /* Get as many responses as we can; call responseCB() and 
01217        * advance qStart for each. */
01218       for (qPtr = qStart;  qPtr != NULL;  qPtr = qPtr->next)
01219         {
01220           if (lineFileParseHttpHeader(lf, &hdr, &chunked, &contentLength))
01221             {
01222               body = lineFileSlurpHttpBody(lf, chunked, contentLength);
01223               dyStringClear(dyQ);
01224               dyStringAppend(dyQ, base);
01225               dyStringAppend(dyQ, qPtr->name);
01226               responseCB(userData, dyQ->string, hdr, body);
01227               qStart = qStart->next;
01228               qCount++;
01229             }
01230           else
01231             {
01232               if (numParseFailures++ > qTotal) {
01233                 done = TRUE;
01234               }
01235               break;
01236             }
01237         }
01238     }
01239 
01240   return qCount;
01241 } /* netHttpMultipleQueries */
01242 
01243 

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2