lib/spacedColumn.c

Go to the documentation of this file.
00001 /* spacedColumn - stuff to handle parsing text files where fields are
00002  * fixed width rather than tab delimited. */
00003 
00004 #include "common.h"
00005 #include "linefile.h"
00006 #include "spacedColumn.h"
00007 #include "obscure.h"
00008 #include "sqlNum.h"
00009 
00010 static char const rcsid[] = "$Id: spacedColumn.c,v 1.1 2007/03/23 06:29:51 kent Exp $";
00011 
00012 struct spacedColumn *spacedColumnFromSample(char *sample)
00013 /* Return spaced column list from a sampleline , which is assumed to
00014  * have no spaces except between columns */
00015 {
00016 struct spacedColumn *col, *colList = NULL;
00017 char *dupe = cloneString(sample);
00018 char *word, *line = dupe;
00019 while ((word = nextWord(&line)) != NULL)
00020     {
00021     AllocVar(col);
00022     col->start = word - dupe;
00023     col->size = strlen(word);
00024     slAddHead(&colList, col);
00025     }
00026 freeMem(dupe);
00027 slReverse(&colList);
00028 return colList;
00029 }
00030 
00031 struct spacedColumn *spacedColumnFromLineFile(struct lineFile *lf)
00032 /* Scan through lineFile and figure out column spacing. Assumes
00033  * file contains nothing but columns. */
00034 {
00035 int maxLine = 64*1024;
00036 int lineSize, widestLine = 0;
00037 char *projection = needMem(maxLine+1);
00038 char *line;
00039 struct spacedColumn *colList;
00040 int i;
00041 
00042 /* Create projection of all lines. */
00043 for (i=0; i<maxLine; ++i)
00044     projection[i] = ' ';
00045 while (lineFileNext(lf, &line, &lineSize))
00046     {
00047     if (lineSize > widestLine)
00048          widestLine = lineSize;
00049     for (i=0; i<lineSize; ++i)
00050         {
00051         char c = line[i];
00052         if (c != 0 && c != ' ')
00053             projection[i] = line[i];
00054         }
00055     }
00056 projection[widestLine] = 0;
00057 colList = spacedColumnFromSample(projection);
00058 freeMem(projection);
00059 return colList;
00060 }
00061 
00062 struct spacedColumn *spacedColumnFromFile(char *fileName)
00063 /* Read file and figure out where columns are. */
00064 {
00065 struct lineFile *lf = lineFileOpen(fileName, TRUE);
00066 struct spacedColumn *colList = spacedColumnFromLineFile(lf);
00067 lineFileClose(&lf);
00068 return colList;
00069 }
00070 
00071 int spacedColumnBiggestSize(struct spacedColumn *colList)
00072 /* Return size of biggest column. */
00073 {
00074 int maxSize = 0;
00075 struct spacedColumn *col;
00076 for (col = colList; col != NULL; col = col->next)
00077     if (maxSize < col->size)
00078         maxSize = col->size;
00079 return maxSize;
00080 }
00081 
00082 boolean spacedColumnParseLine(struct spacedColumn *colList, 
00083         char *line, char *row[])
00084 /* Parse line into row according to colList.  This will
00085  * trim leading and trailing spaces. It will write 0's
00086  * into line.  Returns FALSE if there's a problem (like
00087  * line too short.) */
00088 {
00089 struct spacedColumn *col;
00090 int i, len = strlen(line);
00091 for (i=0, col = colList; col != NULL; col = col->next, ++i)
00092     {
00093     if (col->start > len)
00094         return FALSE;
00095     int end = col->start + col->size;
00096     if (end > len) end = len;
00097     line[end] = 0;
00098     row[i] = trimSpaces(line + col->start);
00099     }
00100 return TRUE;
00101 }
00102 
00103 struct spacedColumn *spacedColumnFromWidthArray(int array[], int size)
00104 /* Return a list of spaced columns corresponding to widths in array.
00105  * The final char in each column should be whitespace. */
00106 {
00107 struct spacedColumn *col, *colList = NULL;
00108 int i;
00109 int start = 0;
00110 for (i=0; i<size; ++i)
00111     {
00112     int width = array[i];
00113     AllocVar(col);
00114     col->start = start;
00115     col->size = width-1;
00116     slAddHead(&colList, col);
00117     start += width;
00118     }
00119 slReverse(&colList);
00120 return colList;
00121 }
00122 
00123 struct spacedColumn *spacedColumnFromSizeCommaList(char *commaList)
00124 /* Given an comma-separated list of widths in ascii, return
00125  * a list of spacedColumns. */
00126 {
00127 struct slName *ascii, *asciiList = commaSepToSlNames(commaList);
00128 int colCount = slCount(asciiList);
00129 int widths[colCount], i;
00130 for (ascii = asciiList, i=0; ascii != NULL; ascii = ascii->next, ++i)
00131     widths[i] = sqlUnsigned(ascii->name);
00132 slFreeList(&asciiList);
00133 return spacedColumnFromWidthArray(widths, colCount);
00134 }
00135 

Generated on Tue Dec 25 18:39:32 2007 for blat by  doxygen 1.5.2