include/writedb.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  edit
struct  child
struct  sequenceData

Functions

void writedb_initialize (char *filename, uint4 alphabetType)
void writedb_addSequence (unsigned char *sequence, uint4 sequenceLength, unsigned char *description, uint4 descriptionLength, unsigned char *wildcards, uint4 wildcardsLength, struct child *children, uint4 numChildren)
void writedb_close ()

Variables

char * writedb_filename
FILE * writedb_sequenceFile
FILE * writedb_descriptionsFile
uint8 writedb_volumnSize
char * writedb_sequenceFilename
char * writedb_descriptionsFilename
char * writedb_dataFilename
uint4 writedb_maximumSequenceLength
uint4 writedb_alphabetType
uint4 writedb_minimumSequenceLength
uint8 writedb_numberOfLetters
uint4 writedb_volume
uint4 writedb_sequenceCount


Function Documentation

void writedb_addSequence ( unsigned char *  sequence,
uint4  sequenceLength,
unsigned char *  description,
uint4  descriptionLength,
unsigned char *  wildcards,
uint4  wildcardsLength,
struct child children,
uint4  numChildren 
)

Definition at line 72 of file writedb.c.

References edit::code, constants_volumeMaxSize, child::description, sequenceData::descriptionLength, child::descriptionLength, child::edits, sequenceData::encodedLength, encoding_bytePackSequence(), encoding_nucleotide, encoding_sentinalCode, global_malloc(), child::length, memBlocks_newEntry(), child::numEdits, child::regionStart, sequenceData::sequenceLength, uint4, vbyte_safePutVbyte, writedb_alphabetType, writedb_descriptionsFile, writedb_descriptionsFilename, writedb_filename, writedb_maximumSequenceLength, writedb_minimumSequenceLength, writedb_numberOfClusters, writedb_numberOfLetters, writedb_sequenceCount, writedb_sequenceData, writedb_sequenceFile, writedb_sequenceFilename, writedb_volume, and writedb_volumeSize.

Referenced by cluster_writeClusters(), main(), and rsdb_writeClusters().

00075 {
00076         uint4 encodedLength, childNum, sizeEdits = 0, editNum;
00077         unsigned char *editData, *startEditData;
00078     struct child child;
00079     struct sequenceData* sequenceData;
00080 
00081     sequenceData = memBlocks_newEntry(writedb_sequenceData);
00082 
00083     // Write the description to file
00084     if (description != NULL)
00085         if (fwrite(description, sizeof(unsigned char), descriptionLength, writedb_descriptionsFile)
00086             < descriptionLength)
00087         {
00088             fprintf(stderr, "Error writing header to sequence file %s\n", writedb_sequenceFilename);
00089             exit(-1);
00090         }
00091 
00092     // Calculate length of encoded sequence
00093     if (writedb_alphabetType == encoding_nucleotide)
00094     {
00095         encodedLength = encoding_bytePackSequence(sequence, sequenceLength);
00096         }
00097     else
00098     {
00099         encodedLength = sequenceLength + 2;
00100     }
00101 
00102         // Calculate maximum space required to record sequence's edits
00103     childNum = 0;
00104         while (childNum < numChildren)
00105     {
00106         child = children[childNum];
00107                 sizeEdits += 16 + 5 * child.numEdits;
00108         childNum++;
00109         }
00110 
00111     // Initialize array to record edits
00112         editData = startEditData = global_malloc(sizeEdits);
00113 
00114     // Record children edits as vbytes
00115     childNum = 0;
00116         while (childNum < numChildren)
00117     {
00118         child = children[childNum];
00119 
00120         // Write children descriptions to disk
00121         if (fwrite(child.description, sizeof(unsigned char), child.descriptionLength,
00122             writedb_descriptionsFile) < child.descriptionLength)
00123         {
00124             fprintf(stderr, "Error writing description to sequence file %s\n", writedb_descriptionsFilename);
00125             exit(-1);
00126         }
00127         descriptionLength += child.descriptionLength;
00128 
00129         // Convert child details to vbytes
00130         vbyte_safePutVbyte(editData, child.descriptionLength);
00131         vbyte_safePutVbyte(editData, child.regionStart);
00132         vbyte_safePutVbyte(editData, child.length);
00133         vbyte_safePutVbyte(editData, child.numEdits);
00134 
00135         // Append edits
00136         editNum = 0;
00137         while (editNum < child.numEdits)
00138         {
00139                 // Record edit character
00140                         *editData = child.edits[editNum].code;
00141             editData++;
00142 
00143                 editNum++;
00144         }
00145 
00146         // Add sequence size to total tally of letters
00147         writedb_numberOfLetters += child.length;
00148         writedb_sequenceCount++;
00149 
00150         childNum++;
00151     }
00152 
00153     // Update volume size, encoded length
00154     encodedLength += (editData - startEditData);
00155     writedb_volumeSize += encodedLength + wildcardsLength;
00156 
00157     sequenceData->descriptionLength = descriptionLength;
00158         sequenceData->sequenceLength = sequenceLength;
00159         sequenceData->encodedLength = encodedLength + wildcardsLength;
00160 
00161     // If the entry will exceed volume max size
00162     if (writedb_volumeSize > constants_volumeMaxSize)
00163     {
00164         // Close current volume
00165         fclose(writedb_sequenceFile);
00166 
00167         // Open next volume for writing
00168         writedb_volume++;
00169         sprintf(writedb_sequenceFilename, "%s.sequences%d", writedb_filename, writedb_volume);
00170         if ((writedb_sequenceFile = fopen(writedb_sequenceFilename, "w")) == NULL)
00171         {
00172             fprintf(stderr, "Error opening file %s for writing\n", writedb_sequenceFilename);
00173             exit(-1);
00174         }
00175 
00176         // Reset volume size counter
00177         writedb_volumeSize = encodedLength + wildcardsLength;
00178     }
00179 
00180     // Nulceotide
00181     if (writedb_alphabetType == encoding_nucleotide)
00182     {
00183         // Write packed nucleotide sequences to disk
00184         if (fwrite(sequence, sizeof(unsigned char), encodedLength, writedb_sequenceFile) < encodedLength)
00185         {
00186             fprintf(stderr, "Error writing to sequence file %s\n", writedb_sequenceFilename);
00187             exit(-1);
00188         }
00189     }
00190     // Protein
00191     else
00192     {
00193         // Write sentinal byte after protein sequences
00194         fputc(encoding_sentinalCode, writedb_sequenceFile);
00195 
00196         // Write sequence codes to disk
00197         if (fwrite(sequence, sizeof(unsigned char), sequenceLength, writedb_sequenceFile) < sequenceLength)
00198         {
00199             fprintf(stderr, "Error writing to sequence file %s\n", writedb_sequenceFilename);
00200             exit(-1);
00201         }
00202 
00203         // Write sentinal byte after protein sequences
00204         fputc(encoding_sentinalCode, writedb_sequenceFile);
00205     }
00206 
00207     // Write wildcard data to disk
00208     if (fwrite(wildcards, sizeof(unsigned char), wildcardsLength, writedb_sequenceFile) < wildcardsLength)
00209     {
00210         fprintf(stderr, "Error writing to sequence file %s\n", writedb_sequenceFilename);
00211         exit(-1);
00212     }
00213 
00214     // Write edit information to disk
00215     if (fwrite(startEditData, sizeof(unsigned char), (editData - startEditData),
00216                writedb_sequenceFile) < (editData - startEditData))
00217     {
00218         fprintf(stderr, "Error writing to sequence file %s\n", writedb_sequenceFilename);
00219         exit(-1);
00220     }
00221     free(startEditData);
00222 
00223         if (numChildren == 0)
00224     {
00225         // Add sequence size to total tally of letters
00226         writedb_numberOfLetters += sequenceLength;
00227         writedb_sequenceCount++;
00228         }
00229 
00230     writedb_numberOfClusters++;
00231 
00232     // Check for new longest/shortest sequence
00233     if (sequenceLength > writedb_maximumSequenceLength)
00234         writedb_maximumSequenceLength = sequenceLength;
00235     if (writedb_minimumSequenceLength == 0 || sequenceLength < writedb_minimumSequenceLength)
00236         writedb_minimumSequenceLength = sequenceLength;
00237 }

Here is the call graph for this function:

Here is the caller graph for this function:

void writedb_close (  ) 

Definition at line 240 of file writedb.c.

References constants_databaseVersion, sequenceData::descriptionLength, sequenceData::encodedLength, encoding_protein, encoding_sentinalCode, memBlocks_free(), memBlocks_getCurrent(), memBlocks_resetCurrent(), sequenceData::sequenceLength, uint4, vbyte_safePutVbyte, writedb_alphabetType, writedb_dataFile, writedb_dataFilename, writedb_descriptionsFile, writedb_maximumSequenceLength, writedb_numberOfClusters, writedb_numberOfLetters, writedb_sequenceCount, writedb_sequenceData, writedb_sequenceFile, and writedb_volume.

Referenced by cluster_writeClusters(), main(), and rsdb_writeClusters().

00241 {
00242         unsigned char headerData[40], *headerDataPointer;
00243         uint4 headerLength;
00244     struct sequenceData* sequenceData;
00245 
00246     // Write sentinal/padding byte at end
00247         if (writedb_alphabetType == encoding_protein)
00248         fputc(encoding_sentinalCode, writedb_sequenceFile);
00249     else
00250         fputc(0, writedb_sequenceFile);
00251 
00252     // Close writing to sequence and description files
00253         fclose(writedb_sequenceFile);
00254         fclose(writedb_descriptionsFile);
00255 
00256         // Open data file for writing
00257         if ((writedb_dataFile = fopen(writedb_dataFilename, "w")) == NULL)
00258         {
00259                 fprintf(stderr, "Error opening file %s for writing\n", writedb_dataFilename);
00260                 exit(-1);
00261         }
00262 
00263         // Convert 6 header values to vbytes
00264     headerDataPointer = headerData;
00265     vbyte_safePutVbyte(headerDataPointer, constants_databaseVersion);
00266     vbyte_safePutVbyte(headerDataPointer, writedb_sequenceCount);
00267     vbyte_safePutVbyte(headerDataPointer, writedb_numberOfLetters);
00268     vbyte_safePutVbyte(headerDataPointer, writedb_maximumSequenceLength);
00269     vbyte_safePutVbyte(headerDataPointer, writedb_alphabetType);
00270     vbyte_safePutVbyte(headerDataPointer, writedb_numberOfClusters);
00271         vbyte_safePutVbyte(headerDataPointer, writedb_volume + 1);
00272 
00273     // Write the header data at the start of the file
00274     headerLength = headerDataPointer - headerData;
00275     if (fwrite(&headerData, sizeof(unsigned char), headerLength, writedb_dataFile) < headerLength)
00276         {
00277                 fprintf(stderr, "Error writing header to sequence file %s\n", writedb_dataFilename);
00278                 exit(-1);
00279         }
00280 
00281     // For each sequence
00282         memBlocks_resetCurrent(writedb_sequenceData);
00283     while ((sequenceData = memBlocks_getCurrent(writedb_sequenceData)) != NULL)
00284     {
00285         // Prepare to write sequence description length, subject length, and encoded length using vbytes
00286             headerDataPointer = headerData;
00287         vbyte_safePutVbyte(headerDataPointer, sequenceData->descriptionLength);
00288         vbyte_safePutVbyte(headerDataPointer, sequenceData->sequenceLength);
00289         vbyte_safePutVbyte(headerDataPointer, sequenceData->encodedLength);
00290 
00291         // Write sequence header information
00292         headerLength = headerDataPointer - headerData;
00293         if (fwrite(headerData, sizeof(unsigned char), headerLength, writedb_dataFile) < headerLength)
00294         {
00295             fprintf(stderr, "Error writing to sequence file %s\n", writedb_dataFilename);
00296             exit(-1);
00297         }
00298         }
00299 
00300         // Close writing to sequence file
00301         fclose(writedb_dataFile);
00302 
00303     memBlocks_free(writedb_sequenceData);
00304 }

Here is the call graph for this function:

Here is the caller graph for this function:

void writedb_initialize ( char *  filename,
uint4  alphabetType 
)

Definition at line 20 of file writedb.c.

References constants_initialSequenceData, encoding_protein, encoding_sentinalCode, global_malloc(), memBlocks_initialize(), writedb_alphabetType, writedb_dataFilename, writedb_descriptionsFile, writedb_descriptionsFilename, writedb_filename, writedb_maximumSequenceLength, writedb_minimumSequenceLength, writedb_numberOfClusters, writedb_numberOfLetters, writedb_sequenceCount, writedb_sequenceData, writedb_sequenceFile, writedb_sequenceFilename, writedb_volume, and writedb_volumeSize.

Referenced by cluster_writeClusters(), main(), and rsdb_writeClusters().

00021 {
00022         char* wildcardsFilename;
00023 
00024     writedb_filename = filename;
00025     writedb_alphabetType = alphabetType;
00026         writedb_maximumSequenceLength = 0;
00027         writedb_minimumSequenceLength = 0;
00028         writedb_numberOfLetters = 0;
00029         writedb_volume = 0;
00030     writedb_sequenceCount = 0;
00031     writedb_numberOfClusters = 0;
00032 
00033         // Construct sequence and description filenames
00034         writedb_sequenceFilename = (char*)global_malloc(strlen(filename) + 13);
00035         sprintf(writedb_sequenceFilename, "%s.sequences", filename);
00036         writedb_descriptionsFilename = (char*)global_malloc(strlen(filename) + 15);
00037         sprintf(writedb_descriptionsFilename, "%s.descriptions", filename);
00038         writedb_dataFilename = (char*)global_malloc(strlen(filename) + 8);
00039         sprintf(writedb_dataFilename, "%s.data", filename);
00040         wildcardsFilename = (char*)global_malloc(strlen(filename) + 12);
00041         sprintf(wildcardsFilename, "%s.wildcards", filename);
00042 
00043     // Delete the wildcards file if one exists
00044     rename(wildcardsFilename, writedb_sequenceFilename);
00045 
00046         // Open sequence file for writing
00047         if ((writedb_sequenceFile = fopen(writedb_sequenceFilename, "w")) == NULL)
00048         {
00049                 fprintf(stderr, "Error opening file %s for writing\n", writedb_sequenceFilename);
00050                 exit(-1);
00051         }
00052 
00053     // Write sentinal/padding byte at start
00054         if (alphabetType == encoding_protein)
00055         fputc(encoding_sentinalCode, writedb_sequenceFile);
00056     else
00057         fputc(0, writedb_sequenceFile);
00058 
00059         // Open descriptions file for writing
00060         if ((writedb_descriptionsFile = fopen(writedb_descriptionsFilename, "w")) == NULL)
00061         {
00062                 fprintf(stderr, "Error opening file %s for writing\n", writedb_descriptionsFilename);
00063                 exit(-1);
00064         }
00065 
00066     writedb_volumeSize = 1;
00067     writedb_sequenceData = memBlocks_initialize(sizeof(struct sequenceData),
00068                            constants_initialSequenceData);
00069 }

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

uint4 writedb_alphabetType

Definition at line 14 of file writedb.c.

Referenced by writedb_addSequence(), writedb_close(), and writedb_initialize().

char * writedb_dataFilename

Definition at line 13 of file writedb.c.

Referenced by cluster_writeClusters(), writedb_close(), and writedb_initialize().

FILE * writedb_descriptionsFile

Definition at line 11 of file writedb.c.

Referenced by writedb_addSequence(), writedb_close(), and writedb_initialize().

char * writedb_descriptionsFilename

Definition at line 13 of file writedb.c.

Referenced by cluster_writeClusters(), writedb_addSequence(), and writedb_initialize().

char* writedb_filename

Definition at line 10 of file writedb.c.

Referenced by writedb_addSequence(), and writedb_initialize().

uint4 writedb_maximumSequenceLength

Definition at line 14 of file writedb.c.

Referenced by main(), writedb_addSequence(), writedb_close(), and writedb_initialize().

uint4 writedb_minimumSequenceLength

Definition at line 14 of file writedb.c.

Referenced by main(), writedb_addSequence(), and writedb_initialize().

uint8 writedb_numberOfLetters

Definition at line 15 of file writedb.c.

Referenced by main(), rsdb_writeClusters(), writedb_addSequence(), writedb_close(), and writedb_initialize().

uint4 writedb_sequenceCount

Definition at line 16 of file writedb.c.

Referenced by main(), writedb_addSequence(), writedb_close(), and writedb_initialize().

FILE* writedb_sequenceFile

Definition at line 11 of file writedb.c.

Referenced by writedb_addSequence(), writedb_close(), and writedb_initialize().

char* writedb_sequenceFilename

Definition at line 13 of file writedb.c.

Referenced by cluster_writeClusters(), writedb_addSequence(), and writedb_initialize().

uint4 writedb_volume

Definition at line 16 of file writedb.c.

Referenced by main(), writedb_addSequence(), writedb_close(), and writedb_initialize().

uint8 writedb_volumnSize


Generated on Wed Dec 19 20:53:19 2007 for fsa-blast by  doxygen 1.5.2