include/readdb.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void readdb_open (char *filename)
int readdb_readSequence (unsigned char **sequence, uint4 *sequenceLength, uint4 *descriptionStart, uint4 *descriptionLength, uint4 *encodedLength)
int readdb_nextVolume ()
childreaddb_getChildren (unsigned char *sequence, uint4 sequenceLength, uint4 encodedLength, uint4 descriptionLocation, uint4 *numChildren)
void readdb_close ()

Variables

uint4 readdb_numberOfSequences
uint4 readdb_longestSequenceLength
uint4 readdb_dbAlphabetType
uint8 readdb_numberOfLetters
unsigned char * readdb_filename
unsigned char * readdb_sequences
uint4 readdb_fileSize
uint4 readdb_sequenceCount
uint4 readdb_descriptionStart
uint4 readdb_volumeNumber
uint4 readdb_numberOfClusters
uint4 readdb_numberOfVolumes
uint4 readdb_numVolumeSequences
uint4 readdb_volume
sequenceDatareaddb_sequenceData


Function Documentation

void readdb_close (  ) 

Definition at line 275 of file readdb.c.

References descriptions_close(), readdb_childBuffer, readdb_dataFilename, readdb_descriptionsFilename, readdb_readData, readdb_sequenceData, readdb_sequenceFilename, readdb_sizeChildBuffer, and readFile_close().

Referenced by blast_search(), and main().

00276 {
00277         free(readdb_sequenceData);
00278         free(readdb_sequenceFilename);
00279     free(readdb_descriptionsFilename);
00280     free(readdb_dataFilename);
00281     readFile_close(readdb_readSequences);
00282     readFile_close(readdb_readData);
00283     descriptions_close();
00284     free(readdb_childBuffer);
00285     readdb_childBuffer = NULL;
00286     readdb_sizeChildBuffer = 0;
00287 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct child* readdb_getChildren ( unsigned char *  sequence,
uint4  sequenceLength,
uint4  encodedLength,
uint4  descriptionLocation,
uint4 *  numChildren 
) [read]

Definition at line 200 of file readdb.c.

References edit::code, child::descriptionLength, child::descriptionLocation, child::edits, encoding_aaStartWildcards, encoding_sentinalCode, global_malloc(), global_realloc(), child::length, child::numEdits, edit::position, readdb_childBuffer, readdb_sizeChildBuffer, child::regionStart, child::sequence, uint4, and vbyte_getVbyte.

Referenced by alignments_expandCluster(), and main().

00202 {
00203         unsigned char* edits, *editsEnd;
00204     struct child* child, *children;
00205     uint4 editNum, position;
00206 
00207     editsEnd = sequence + encodedLength - 1;
00208 
00209         // Advance to start of child information
00210         edits = sequence + sequenceLength + 1;
00211 
00212     *numChildren = 0;
00213         while (edits < editsEnd)
00214     {
00215         // Increase size of children buffer if required
00216         if (*numChildren >= readdb_sizeChildBuffer)
00217         {
00218                 readdb_sizeChildBuffer = (readdb_sizeChildBuffer + 1) * 2;
00219                 readdb_childBuffer = (struct child*)global_realloc(readdb_childBuffer,
00220                                   sizeof(struct child) * readdb_sizeChildBuffer);
00221         }
00222 
00223         child = readdb_childBuffer + *numChildren;
00224 
00225         // Read child details
00226         vbyte_getVbyte(edits, &(child->descriptionLength));
00227         vbyte_getVbyte(edits, &(child->regionStart));
00228         vbyte_getVbyte(edits, &(child->length));
00229         vbyte_getVbyte(edits, &(child->numEdits));
00230 
00231         // Start with copy of parent sequence
00232         child->sequence = (unsigned char*)global_malloc(child->length + 2);
00233                 child->sequence++;
00234         memcpy(child->sequence, sequence + child->regionStart, child->length);
00235  
00236         // Add sentinal codes to either end
00237                 child->sequence[-1] = encoding_sentinalCode;
00238                 child->sequence[child->length] = encoding_sentinalCode;
00239 
00240         child->edits = (struct edit*)global_malloc(sizeof(struct edit) * child->numEdits);
00241 
00242         // Read edits and update sequence
00243         position = 0;
00244         editNum = 0;
00245         while (editNum < child->numEdits)
00246         {
00247                 // Locate the next wildcard in the child
00248                         while (child->sequence[position] < encoding_aaStartWildcards)
00249                 position++;
00250 
00251             // Read position and character
00252                 child->edits[editNum].position = position;
00253                         child->edits[editNum].code = *edits;
00254             edits++;
00255 
00256             // Change character in child
00257                         child->sequence[position] = child->edits[editNum].code;
00258 
00259                 editNum++;
00260         }
00261 
00262         child->descriptionLocation = descriptionLocation;
00263         descriptionLocation += child->descriptionLength;
00264         (*numChildren)++;
00265     }
00266 
00267     // Copy children from buffer into new memory block and return
00268         children = (struct child*)global_malloc(sizeof(struct child) * *numChildren);
00269     memcpy(children, readdb_childBuffer, sizeof(struct child) * *numChildren);
00270 
00271     return children;
00272 }

Here is the call graph for this function:

Here is the caller graph for this function:

int readdb_nextVolume (  ) 

Definition at line 123 of file readdb.c.

References readFile::address, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, encoding_alphabetType, encoding_protein, readFile::fileSize, readdb_data, readdb_descriptionStart, readdb_filename, readdb_fileSize, readdb_numberOfClusters, readdb_numberOfVolumes, readdb_numVolumeSequences, readdb_sequenceCount, readdb_sequenceData, readdb_sequenceFilename, readdb_sequences, readdb_volume, readFile_close(), readFile_open(), sequenceData::sequence, sequenceData::sequenceLength, uint4, and vbyte_getVbyte.

Referenced by blast_search(), and main().

00124 {
00125         uint4 encodedLength, sequenceLength, descriptionLength, sequenceCount = 0, offset = 0;
00126 
00127     // Return 0 if no more volumes to read
00128         readdb_volume++;
00129     if (readdb_volume >= readdb_numberOfVolumes)
00130         return 0;
00131 
00132     // Close current volume
00133     readFile_close(readdb_readSequences);
00134 
00135     // Open next volume
00136     sprintf(readdb_sequenceFilename, "%s.sequences%d", readdb_filename, readdb_volume);
00137     readdb_readSequences = readFile_open(readdb_sequenceFilename);
00138     readdb_sequences = (unsigned char*)readdb_readSequences.address;
00139 
00140     // Get the sequences file size
00141         readdb_fileSize = readdb_readSequences.fileSize;
00142 
00143     // For each sequence in next volume volume
00144     offset = 1;
00145         sequenceCount = 0;
00146     while (sequenceCount < readdb_numberOfClusters && offset < readdb_fileSize)
00147     {
00148         // Read sequence data
00149         vbyte_getVbyte(readdb_data, &descriptionLength);
00150         vbyte_getVbyte(readdb_data, &sequenceLength);
00151         vbyte_getVbyte(readdb_data, &encodedLength);
00152 
00153         readdb_sequenceData[sequenceCount].descriptionLength = descriptionLength;
00154         readdb_sequenceData[sequenceCount].descriptionStart = readdb_descriptionStart;
00155         readdb_sequenceData[sequenceCount].sequenceLength = sequenceLength;
00156         readdb_sequenceData[sequenceCount].encodedLength = encodedLength;
00157 
00158         // Record pointer to sequence
00159         readdb_sequenceData[sequenceCount].sequence = readdb_sequences + offset;
00160 
00161         // If protein data skip past sentinal byte
00162         if (encoding_alphabetType == encoding_protein)
00163             readdb_sequenceData[sequenceCount].sequence++;
00164 
00165         offset += encodedLength;
00166 
00167         readdb_descriptionStart += descriptionLength;
00168 
00169         sequenceCount++;
00170     }
00171 
00172     readdb_sequenceCount = 0;
00173     readdb_numVolumeSequences = sequenceCount;
00174 
00175     return 1;
00176 }

Here is the call graph for this function:

Here is the caller graph for this function:

void readdb_open ( char *  filename  ) 

Definition at line 22 of file readdb.c.

References readFile::address, constants_databaseVersion, sequenceData::descriptionLength, descriptions_open(), sequenceData::descriptionStart, sequenceData::encodedLength, encoding_protein, readFile::fileSize, global_malloc(), readdb_data, readdb_dataFilename, readdb_dbAlphabetType, readdb_descriptionsFilename, readdb_descriptionStart, readdb_filename, readdb_fileSize, readdb_longestSequenceLength, readdb_numberOfClusters, readdb_numberOfLetters, readdb_numberOfSequences, readdb_numberOfVolumes, readdb_numVolumeSequences, readdb_readData, readdb_sequenceCount, readdb_sequenceData, readdb_sequenceFilename, readdb_sequences, readdb_volume, readdb_volumeNumber, readFile_checkOpen(), readFile_open(), sequenceData::sequence, sequenceData::sequenceLength, uint4, vbyte_get64vbyte(), vbyte_getVbyte, and wildcards_readWildcards().

Referenced by main().

00023 {
00024         uint4 databaseVersion, encodedLength, sequenceLength, descriptionLength, sequenceCount, offset;
00025         char* wildcardsFile;
00026 
00027         readdb_filename = filename;
00028     readdb_sequenceCount = 0;
00029     readdb_descriptionStart = 0;
00030     readdb_volumeNumber = 0;
00031     readdb_volume = 0;
00032 
00033         // Open sequence file for reading, mapping contents to readdb_address
00034         readdb_sequenceFilename = (char*)global_malloc(strlen(filename) + 15);
00035         sprintf(readdb_sequenceFilename, "%s.sequences", filename);
00036 
00037     // Report error if .sequences file doesn't exist
00038     if (!readFile_checkOpen(readdb_sequenceFilename))
00039     {
00040         fprintf(stderr, "Error: unable to open file %s for reading\n", readdb_sequenceFilename);
00041         fprintf(stderr, "Before searching a collection you must to format it ");
00042         fprintf(stderr, "using FSA-BLAST's formatdb tool which creates .sequences .descriptions and ");
00043         fprintf(stderr, ".data files for the collection.\n");
00044         exit(-1);
00045     }
00046 
00047     readdb_readSequences = readFile_open(readdb_sequenceFilename);
00048         readdb_sequences = (char*)readdb_readSequences.address;
00049 
00050     // Open descriptions file for reading
00051     readdb_descriptionsFilename = (char*)global_malloc(strlen(filename) + 15);
00052         sprintf(readdb_descriptionsFilename, "%s.descriptions", filename);
00053     descriptions_open(readdb_descriptionsFilename);
00054 
00055         // Open data file for reading
00056         readdb_dataFilename = (char*)global_malloc(strlen(filename) + 13);
00057         sprintf(readdb_dataFilename, "%s.data", filename);
00058         readdb_readData = readFile_open(readdb_dataFilename);
00059         readdb_data = (char*)readdb_readData.address;
00060 
00061     // Read in a set of wildcards
00062     wildcardsFile = (char*)global_malloc(strlen(filename) + 13);
00063         sprintf(wildcardsFile, "%s.wildcards", filename);
00064     wildcards_readWildcards(wildcardsFile);
00065     free(wildcardsFile);
00066 
00067     // Get the sequences file size
00068         readdb_fileSize = readdb_readSequences.fileSize;
00069 
00070         // Read database statistics
00071     vbyte_getVbyte(readdb_data, &databaseVersion);
00072     if (databaseVersion != constants_databaseVersion)
00073     {
00074         fprintf(stderr, "Error: Invalid formatted database version %d. ", databaseVersion);
00075         fprintf(stderr, "Current supported version is %d.\n", constants_databaseVersion);
00076         fprintf(stderr, "Use formatdb tool to reformat database to version %d.\n",
00077                         constants_databaseVersion);
00078         exit(-1);
00079     }
00080     vbyte_getVbyte(readdb_data, &readdb_numberOfSequences);
00081     readdb_data = vbyte_get64vbyte(readdb_data, &readdb_numberOfLetters);
00082     vbyte_getVbyte(readdb_data, &readdb_longestSequenceLength);
00083     vbyte_getVbyte(readdb_data, &readdb_dbAlphabetType);
00084     vbyte_getVbyte(readdb_data, &readdb_numberOfClusters);
00085     vbyte_getVbyte(readdb_data, &readdb_numberOfVolumes);
00086 
00087         readdb_sequenceData = (struct sequenceData*)global_malloc(sizeof(struct sequenceData)
00088                         * readdb_numberOfClusters);
00089 
00090         // For each sequence in first volume
00091     offset = 1;
00092         sequenceCount = 0;
00093     while (sequenceCount < readdb_numberOfClusters && offset < readdb_fileSize)
00094     {
00095         // Read sequence data
00096         vbyte_getVbyte(readdb_data, &descriptionLength);
00097         vbyte_getVbyte(readdb_data, &sequenceLength);
00098         vbyte_getVbyte(readdb_data, &encodedLength);
00099 
00100         readdb_sequenceData[sequenceCount].descriptionLength = descriptionLength;
00101         readdb_sequenceData[sequenceCount].descriptionStart = readdb_descriptionStart;
00102         readdb_sequenceData[sequenceCount].sequenceLength = sequenceLength;
00103         readdb_sequenceData[sequenceCount].encodedLength = encodedLength;
00104 
00105         // Record pointer to sequence
00106         readdb_sequenceData[sequenceCount].sequence = readdb_sequences + offset;
00107 
00108         // If protein data skip past sentinal byte
00109         if (readdb_dbAlphabetType == encoding_protein)
00110             readdb_sequenceData[sequenceCount].sequence++;
00111 
00112         offset += encodedLength;
00113 
00114         readdb_descriptionStart += descriptionLength;
00115 
00116         sequenceCount++;
00117     }
00118 
00119     readdb_numVolumeSequences = sequenceCount;
00120 }

Here is the call graph for this function:

Here is the caller graph for this function:

int readdb_readSequence ( unsigned char **  sequence,
uint4 *  sequenceLength,
uint4 *  descriptionStart,
uint4 *  descriptionLength,
uint4 *  encodedLength 
)

Definition at line 179 of file readdb.c.

References sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, readdb_numVolumeSequences, readdb_sequenceCount, readdb_sequenceData, sequenceData::sequence, and sequenceData::sequenceLength.

Referenced by main().

Here is the caller graph for this function:


Variable Documentation

uint4 readdb_dbAlphabetType

Definition at line 10 of file readdb.c.

Referenced by blast_search(), cluster_writeClusters(), main(), readdb_open(), and rsdb_writeClusters().

uint4 readdb_descriptionStart

Definition at line 13 of file readdb.c.

Referenced by readdb_nextVolume(), and readdb_open().

unsigned char* readdb_filename

Definition at line 12 of file readdb.c.

Referenced by readdb_nextVolume(), and readdb_open().

uint4 readdb_fileSize

Definition at line 13 of file readdb.c.

Referenced by readdb_nextVolume(), and readdb_open().

uint4 readdb_longestSequenceLength

Definition at line 10 of file readdb.c.

Referenced by alignments_expandCluster(), blast_search(), cluster_writeClusters(), main(), and readdb_open().

uint4 readdb_numberOfClusters

Definition at line 16 of file readdb.c.

Referenced by main(), readdb_nextVolume(), and readdb_open().

uint8 readdb_numberOfLetters

Definition at line 11 of file readdb.c.

Referenced by blast_search(), main(), and readdb_open().

uint4 readdb_numberOfSequences

Definition at line 10 of file readdb.c.

Referenced by blast_search(), cluster_writeClusters(), main(), print_XMLfooter(), readdb_open(), and rsdb_writeClusters().

uint4 readdb_numberOfVolumes

Definition at line 16 of file readdb.c.

Referenced by blast_search(), main(), readdb_nextVolume(), and readdb_open().

uint4 readdb_numVolumeSequences

Definition at line 18 of file readdb.c.

Referenced by blast_search(), readdb_nextVolume(), readdb_open(), and readdb_readSequence().

uint4 readdb_sequenceCount

Definition at line 13 of file readdb.c.

Referenced by readdb_nextVolume(), readdb_open(), and readdb_readSequence().

struct sequenceData* readdb_sequenceData

Definition at line 19 of file readdb.c.

Referenced by blast_search(), readdb_close(), readdb_nextVolume(), readdb_open(), and readdb_readSequence().

unsigned char * readdb_sequences

Definition at line 12 of file readdb.c.

Referenced by blast_search(), readdb_nextVolume(), and readdb_open().

uint4 readdb_volume

Definition at line 18 of file readdb.c.

Referenced by blast_search(), readdb_nextVolume(), and readdb_open().

uint4 readdb_volumeNumber

Definition at line 16 of file readdb.c.

Referenced by readdb_open().


Generated on Wed Dec 19 20:51:17 2007 for fsa-blast by  doxygen 1.5.2