src/readdbApp.c File Reference

#include "blast.h"

Include dependency graph for readdbApp.c:

Go to the source code of this file.

Functions

int4 main (int4 argc, char *argv[])


Function Documentation

int4 main ( int4  argc,
char *  argv[] 
)

Definition at line 10 of file readdbApp.c.

References child::description, child::descriptionLength, child::descriptionLocation, descriptions_getDescription(), child::edits, encoding_alphabetType, encoding_alphabetTypes, encoding_byteUnpack(), encoding_initialize(), encoding_nucleotide, encoding_protein, global_realloc(), child::length, print_singleSequence(), readdb_dbAlphabetType, readdb_getChildren(), readdb_longestSequenceLength, readdb_nextVolume(), readdb_numberOfClusters, readdb_numberOfLetters, readdb_numberOfSequences, readdb_numberOfVolumes, readdb_open(), readdb_readSequence(), child::regionStart, child::sequence, and uint4.

00011 {
00012         unsigned char *filename, *sequence;
00013         uint4 descriptionStart = 0, descriptionLength = 0, sequenceLength;
00014         uint4 encodedLength, numChildren, childNum, count;
00015         char *description;
00016     struct child* children, *child;
00017         uint4* clusterSizes = NULL, numClusterSizes = 0;
00018         uint4 display = 1;
00019 
00020         // User must provide FASTA format file at command line
00021         if (argc < 2)
00022         {
00023                 fprintf(stderr, "Useage: readdb <FASTA file>\n");
00024                 exit(-1);
00025         }
00026         filename = argv[1];
00027 
00028         readdb_open(filename);
00029 
00030     printf("Number of clusters = %u\n", readdb_numberOfClusters);
00031     printf("Number of sequences = %u\n", readdb_numberOfSequences);
00032     printf("Number of volumes = %u\n", readdb_numberOfVolumes);
00033         printf("Total number of letters = %llu\n", readdb_numberOfLetters);
00034         printf("Length of longest sequence = %u\n", readdb_longestSequenceLength);
00035         printf("Alphabet type = %s\n", encoding_alphabetTypes[readdb_dbAlphabetType]);
00036 
00037         // Initialize codes array
00038         encoding_initialize(readdb_dbAlphabetType);
00039 
00040     do
00041     {
00042         // Read each sequence in the collection
00043         while (readdb_readSequence(&sequence, &sequenceLength, &descriptionStart,
00044                                    &descriptionLength, &encodedLength))
00045         {
00046             // Unpack nucleotide sequences
00047             if (encoding_alphabetType == encoding_nucleotide)
00048                 sequence = encoding_byteUnpack(sequence, sequenceLength);
00049 
00050             if (encoding_alphabetType == encoding_protein && sequenceLength + 2 != encodedLength)
00051             {
00052                 // Get the children
00053                 children = readdb_getChildren(sequence, sequenceLength, encodedLength,
00054                                               descriptionStart, &numChildren);
00055 
00056                                 // Record number of clusters of each size
00057                                 if (numChildren + 1 > numClusterSizes)
00058                 {
00059                                         clusterSizes = global_realloc(clusterSizes, sizeof(uint4) * (numChildren + 1));
00060                     while (numClusterSizes < numChildren + 1)
00061                     {
00062                                                 clusterSizes[numClusterSizes] = 0;
00063                         numClusterSizes++;
00064                     }
00065                 }
00066                 clusterSizes[numChildren]++;
00067 
00068 //                if (sequenceLength < 60)
00069 //                display = 1;
00070 
00071                 if (display)
00072                 printf("\n*** Parent with %d children ***\n", numChildren);
00073 
00074                 // Print cluster
00075                 if (display)
00076                 {
00077                         print_singleSequence(sequence, sequenceLength); printf("\n");
00078                 }
00079 
00080                 // For each child
00081                 childNum = 0;
00082                 while (childNum < numChildren)
00083                 {
00084                     child = children + childNum;
00085 
00086                     // Align with parent
00087                     count = 0;
00088                     if (display)
00089                     while (count < child->regionStart)
00090                     {
00091                         printf(" ");
00092                         count++;
00093                     }
00094 
00095                     // Print child sequence and description
00096                     if (display)
00097                     print_singleSequence(child->sequence, child->length);
00098 
00099                     description = descriptions_getDescription(child->descriptionLocation,
00100                                                               child->descriptionLength);
00101 
00102                     // Print descriptions at end
00103                     count = child->regionStart + child->length;
00104                     if (display)
00105                     while (count < sequenceLength)
00106                     {
00107                         printf(" ");
00108                         count++;
00109                     }
00110 
00111                     if (display)
00112                     {
00113 //                      printf(" (%s dloc=%d)\n", description, child->descriptionLocation);
00114                         printf(" (%s)\n", description);
00115                                         }
00116 
00117                     free(child->sequence - 1);
00118                     free(child->edits);
00119 
00120                     childNum++;
00121                 }
00122 
00123                 free(children);
00124             }
00125             else
00126             {
00127                 // Print the sequence description
00128                 description = descriptions_getDescription(descriptionStart, descriptionLength);
00129                 if (display)
00130                 {
00131 //                      printf(">%s (dloc = %d)\n", description, descriptionStart);
00132                         printf(">%s\n", description);
00133                                 }
00134 
00135                 // Print sequence
00136                 if (display)
00137                 {
00138                         print_singleSequence(sequence, sequenceLength); printf("\n");
00139                 }
00140             }
00141 
00142             // Free unpacked sequence
00143             if (encoding_alphabetType == encoding_nucleotide)
00144                 free(sequence);
00145         }
00146         }
00147     while (readdb_nextVolume());
00148 
00149         printf("%d sequences read.\n", readdb_numberOfSequences);
00150         fflush(stdout);
00151 
00152     while (numClusterSizes > 0)
00153     {
00154         numClusterSizes--;
00155         printf("%d clusters with %d children\n", clusterSizes[numClusterSizes], numClusterSizes);
00156     }
00157 
00158         return 0;
00159 }

Here is the call graph for this function:


Generated on Wed Dec 19 20:59:12 2007 for fsa-blast by  doxygen 1.5.2