include/postings.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  wordLocation
struct  finalList

Functions

void postings_initialize ()
void postings_addEntry (unsigned char *word, uint4 wordLength, uint4 sequenceNumber, uint4 offset)
void postings_print ()
finalListpostings_getSortedLists ()
uint4 postings_decodeList (struct finalList finalList, struct wordLocation *wordLocations)

Variables

uint4 postings_numLists


Function Documentation

void postings_addEntry ( unsigned char *  word,
uint4  wordLength,
uint4  sequenceNumber,
uint4  offset 
)

Definition at line 56 of file postings.c.

References global_malloc(), postingsList::hash64bit, postingsList::next, postings_addPosting(), postings_hash32bit(), postings_hash64bit(), postings_hashtable, postings_hashtableSize, postings_numLists, postings_seed, postings_size, uint4, and uint8.

Referenced by cluster_spexClusterSequences(), and rsdb_spexClusterSequences().

00057 {
00058         struct postingsList* postingsList;
00059         uint4 hash32bit;
00060     uint8 hash64bit;
00061 
00062     // Calculate 32 and 64 bit hash values
00063         hash32bit = postings_hash32bit(word, wordLength, postings_hashtableSize, postings_seed);
00064         hash64bit = postings_hash64bit(word, wordLength, postings_seed);
00065 
00066     // Look for matching entry in hashtable
00067     postingsList = postings_hashtable[hash32bit];
00068     while (postingsList != NULL)
00069     {
00070         if (postingsList->hash64bit == hash64bit)
00071         {
00072                 // Add new entry to postings list
00073                         postings_addPosting(postingsList, sequenceNumber, offset);
00074                 return;
00075         }
00076         postingsList = postingsList->next;
00077     }
00078 
00079     // Create a new entry
00080     postingsList = (struct postingsList*)global_malloc(sizeof(struct postingsList));
00081         postingsList->hash64bit = hash64bit;
00082         postingsList->list = NULL;
00083     postingsList->length = 0;
00084     postingsList->malloced = 0;
00085     postingsList->numEntries = 0;
00086     postingsList->lastSequenceNumber = 0;
00087         postings_size += sizeof(struct postingsList);
00088     postings_numLists++;
00089 
00090     // Add to start of list
00091         postingsList->next = postings_hashtable[hash32bit];
00092     postings_hashtable[hash32bit] = postingsList;
00093 
00094     // Add new entry to postings list
00095         postings_addPosting(postingsList, sequenceNumber, offset);
00096 }

Here is the call graph for this function:

Here is the caller graph for this function:

uint4 postings_decodeList ( struct finalList  finalList,
struct wordLocation wordLocations 
)

Definition at line 171 of file postings.c.

References finalList::list, finalList::numEntries, wordLocation::offset, wordLocation::sequenceNumber, uint4, and vbyte_getVbyte.

Referenced by cluster_spexClusterSequences(), and rsdb_spexClusterSequences().

00172 {
00173         unsigned char* list;
00174         uint4 numEntries, sGap, offset, sequenceNumber = 0, entry = 0;
00175 
00176     list = finalList.list;
00177     numEntries = finalList.numEntries;
00178 
00179     while (entry < numEntries)
00180     {
00181         vbyte_getVbyte(list, &sGap);
00182         sequenceNumber += sGap;
00183         vbyte_getVbyte(list, &offset);
00184 
00185                 wordLocations[entry].sequenceNumber = sequenceNumber;
00186                 wordLocations[entry].offset = offset;
00187 
00188 //        printf("%d,%d (%d/%d entries)\n", sequenceNumber, offset, entry, numEntries); fflush(stdout);
00189 
00190         entry++;
00191     }
00192 
00193     free(finalList.list);
00194 
00195     return numEntries;
00196 }

Here is the caller graph for this function:

struct finalList* postings_getSortedLists (  )  [read]

Definition at line 121 of file postings.c.

References global_malloc(), postingsList::length, postingsList::list, finalList::list, postingsList::next, postingsList::numEntries, finalList::numEntries, postings_compareLists(), postings_hashtable, postings_hashtableSize, postings_numLists, and uint4.

Referenced by cluster_spexClusterSequences(), and rsdb_spexClusterSequences().

00122 {
00123         struct finalList* finalLists;
00124     struct postingsList* postingsList, *previousPostingsList;
00125     uint4 listCount = 0, totalSize = 0;
00126         uint4 hash32bit;
00127 
00128     finalLists = (struct finalList*)global_malloc(sizeof(struct finalList) * postings_numLists);
00129 
00130     // For each list
00131         hash32bit = 0;
00132     while (hash32bit < postings_hashtableSize)
00133     {
00134         postingsList = postings_hashtable[hash32bit];
00135         while (postingsList != NULL)
00136         {
00137                 // Add to new list of final lists
00138                         finalLists[listCount].numEntries = postingsList->numEntries;
00139                         finalLists[listCount].list = postingsList->list;
00140 
00141                         totalSize += postingsList->length;
00142 
00143                         listCount++;
00144             previousPostingsList = postingsList;
00145             postingsList = postingsList->next;
00146 
00147             free(previousPostingsList);
00148         }
00149 
00150         hash32bit++;
00151     }
00152 
00153     free(postings_hashtable);
00154 
00155     // Sort the lists in order of number of entries
00156         qsort(finalLists, postings_numLists, sizeof(struct finalList), postings_compareLists);
00157 
00158     // Remove lists with less than 2 entries
00159     while (postings_numLists > 0 && finalLists[postings_numLists - 1].numEntries < 2)
00160     {
00161                 postings_numLists--;
00162         free(finalLists[postings_numLists].list);
00163     }
00164 
00165 //      printf("Total postings size=%d\n", totalSize);
00166 
00167     return finalLists;
00168 }

Here is the call graph for this function:

Here is the caller graph for this function:

void postings_initialize (  ) 

Definition at line 36 of file postings.c.

References global_malloc(), postings_hashtable, postings_hashtableSize, postings_numLists, postings_seed, postings_size, and uint4.

Referenced by cluster_spexClusterSequences(), and rsdb_spexClusterSequences().

00037 {
00038         uint4 count;
00039 
00040         postings_hashtableSize = pow(2,20);
00041     postings_hashtable = (struct postingsList**)global_malloc(sizeof(struct postingsList*)
00042                        * postings_hashtableSize);
00043         postings_seed = rand();
00044     postings_size = sizeof(struct postingsList*) * postings_hashtableSize;
00045     postings_numLists = 0;
00046 
00047     count = 0;
00048     while (count < postings_hashtableSize)
00049     {
00050                 postings_hashtable[count] = NULL;
00051         count++;
00052     }
00053 }

Here is the call graph for this function:

Here is the caller graph for this function:

void postings_print (  ) 

Definition at line 247 of file postings.c.

References postings_numLists, and postings_size.

Referenced by rsdb_spexClusterSequences().

00248 {
00249         printf("%d lists. Postings size=%.2f Mb\n", postings_numLists, (float)postings_size / 1024.0 / 1024.0);
00250 }

Here is the caller graph for this function:


Variable Documentation

uint4 postings_numLists

Definition at line 22 of file postings.c.

Referenced by cluster_spexClusterSequences(), postings_addEntry(), postings_getSortedLists(), postings_initialize(), postings_print(), and rsdb_spexClusterSequences().


Generated on Wed Dec 19 20:50:46 2007 for fsa-blast by  doxygen 1.5.2