#include <stdio.h>Include dependency graph for readindex.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.
Data Structures | |
| struct | index_scanner |
| struct | lo_info |
| struct | listinfo |
Functions | |
| index_scanner * | open_index (char *index_prefix) |
| listinfo * | get_next_list (struct index_scanner *iscn, struct listinfo *info) |
| listinfo * | get_list_at (struct index_scanner *iscn, struct listinfo *info, long offset) |
| listinfo * | get_next_biglist (struct index_scanner *iscn, struct listinfo *info) |
| lo_info * | get_next_lo_info (struct index_scanner *iscn, struct lo_info *lo_info) |
| void | info_free (struct listinfo *info) |
| index_scanner * | reset_index (struct index_scanner *scn) |
| index_scanner * | close_index (struct index_scanner *scn) |
Yaniv Bernstein 2004
Definition in file readindex.h.
| struct index_scanner* close_index | ( | struct index_scanner * | scn | ) | [read] |
Close all the file pointers to the index
Definition at line 166 of file readindex.c.
References index_scanner::idx_file.
Referenced by cluster_clusterSequences().
00167 { 00168 fclose(scn->idx_file); 00169 00170 free(scn); 00171 00172 return scn; 00173 }
Here is the caller graph for this function:

| struct listinfo* get_list_at | ( | struct index_scanner * | iscn, | |
| struct listinfo * | info, | |||
| long | offset | |||
| ) | [read] |
Definition at line 65 of file readindex.c.
References listinfo::doc_count, listinfo::doc_numbers, index_scanner::idx_file, info_free(), listinfo::phrase_frequency, listinfo::phrase_offsets, listinfo::size, vbyte_read(), vec_free(), vec_getvbyte(), vec_init(), and vec::vector.
Referenced by get_next_biglist(), and get_next_list().
00066 { 00067 struct vec *vector; 00068 unsigned long num, last_doc = -1; 00069 int c, d; 00070 FILE *idx = iscn->idx_file; 00071 unsigned long veclen, 00072 numdocs, 00073 occurs, 00074 last_offset; 00075 00076 if (offset >= 0) 00077 fseek(idx, offset, SEEK_SET); 00078 00079 /* There is nothing left to read; free structures */ 00080 if (!( vbyte_read(idx, &numdocs) 00081 && vbyte_read(idx, &occurs) 00082 && vbyte_read(idx, &veclen) )) 00083 { 00084 info_free(info); 00085 return NULL; 00086 } 00087 00088 /* Initialise the info structure if we weren't already given one */ 00089 if (info == NULL) 00090 { 00091 info = malloc(sizeof(struct listinfo)); 00092 info->doc_numbers = malloc(numdocs * sizeof(unsigned long)); 00093 info->phrase_frequency = malloc(numdocs * sizeof(unsigned long)); 00094 info->phrase_offsets = malloc(numdocs * sizeof(unsigned long *)); 00095 memset(info->phrase_offsets, 0, numdocs * sizeof(unsigned long *)); 00096 info->size = numdocs; 00097 } 00098 00099 /* We may need to enlarge various aspects of the structure if it is not 00100 * big enough to hold the current postings list */ 00101 if (info->size < numdocs) 00102 { 00103 info->doc_numbers = realloc(info->doc_numbers, numdocs * sizeof(unsigned long)); 00104 info->phrase_frequency = realloc(info->phrase_frequency, numdocs * sizeof(unsigned long)); 00105 info->phrase_offsets = realloc(info->phrase_offsets, numdocs * sizeof(unsigned long *)); 00106 memset(info->phrase_offsets + info->size, 0, (numdocs - info->size) * sizeof(unsigned long *)); 00107 info->size = numdocs; 00108 } 00109 00110 info->doc_count = numdocs; 00111 00112 vector = vec_init(veclen); 00113 vector->len = veclen; 00114 00115 /* Populate the vector's block of memory from the appropriate location 00116 * in the vector file */ 00117 fread(vector->vector, veclen, 1, idx); 00118 00119 /* Read in the stats for each document in the postings list */ 00120 for (c = 0; c < numdocs; c++) 00121 { 00122 /* Get the document number (stored as a d-gap) */ 00123 vec_getvbyte(vector, &num); 00124 info->doc_numbers[c] = last_doc + num + 1; 00125 last_doc += num + 1; 00126 00127 /* Read the number of times the phrase occurs in this document */ 00128 vec_getvbyte(vector, &info->phrase_frequency[c]); 00129 if (info->phrase_offsets[c] != NULL) 00130 free(info->phrase_offsets[c]); 00131 00132 /* Prepare the memory for the appropriate number of offsets */ 00133 info->phrase_offsets[c] = malloc(info->phrase_frequency[c] * sizeof(unsigned long)); 00134 memset(info->phrase_offsets[c], 0, info->phrase_frequency[c] * sizeof(unsigned long)); 00135 00136 /* Read in all the document offsets */ 00137 last_offset = -1; 00138 for (d = 0; d < info->phrase_frequency[c]; d++) 00139 { 00140 vec_getvbyte(vector, info->phrase_offsets[c] + d); 00141 last_offset += (info->phrase_offsets[c])[d] + 1; 00142 (info->phrase_offsets[c])[d] = last_offset; 00143 } 00144 } 00145 00146 vec_free(vector); 00147 00148 return info; 00149 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct listinfo* get_next_biglist | ( | struct index_scanner * | iscn, | |
| struct listinfo * | info | |||
| ) | [read] |
Definition at line 36 of file readindex.c.
References get_list_at(), get_next_lo_info(), and lo_info::offset.
Referenced by cluster_clusterSequences().
00037 { 00038 struct lo_info *lo_info; 00039 00040 lo_info = get_next_lo_info(iscn, NULL); 00041 00042 if (!lo_info) 00043 return NULL; 00044 00045 return get_list_at(iscn, info, lo_info->offset); 00046 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct listinfo* get_next_list | ( | struct index_scanner * | iscn, | |
| struct listinfo * | info | |||
| ) | [read] |
Return a phrase_info data structure for the next phrase in the index
Definition at line 31 of file readindex.c.
References get_list_at().
Referenced by cluster_clusterSequences().
00032 { 00033 return get_list_at(iscn, info, -1); 00034 }
Here is the call graph for this function:

Here is the caller graph for this function:

| struct lo_info* get_next_lo_info | ( | struct index_scanner * | iscn, | |
| struct lo_info * | lo_info | |||
| ) | [read] |
Definition at line 48 of file readindex.c.
References index_scanner::offsets, and vbyte_read().
Referenced by get_next_biglist().
00049 { 00050 FILE *offsetfile = iscn->offsets; 00051 00052 if (!lo_info) 00053 lo_info = malloc(sizeof(struct lo_info)); 00054 00055 vbyte_read(offsetfile, &(lo_info->size)); 00056 if (!(vbyte_read(offsetfile, &(lo_info->offset)))) 00057 { 00058 free(lo_info); 00059 return NULL; 00060 } 00061 00062 return lo_info; 00063 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void info_free | ( | struct listinfo * | info | ) |
Deallocates the memory used by the data structure
Definition at line 151 of file readindex.c.
References listinfo::doc_numbers, and listinfo::phrase_frequency.
Referenced by get_list_at().
00152 { 00153 free(info->doc_numbers); 00154 free(info->phrase_frequency); 00155 00156 free(info); 00157 }
Here is the caller graph for this function:

| struct index_scanner* open_index | ( | char * | index_prefix | ) | [read] |
Returns a data structure that contains open buffered file pointers to the index file and all the vector files in preparation for sequential reading of postings vectors.
Definition at line 13 of file readindex.c.
References index_scanner::idx_file, and index_scanner::offsets.
Referenced by cluster_clusterSequences().
00014 { 00015 struct index_scanner *scanner; 00016 char fnamebuf[FILENAME_MAX + 1]; 00017 00018 fnamebuf[FILENAME_MAX] = '\0'; 00019 00020 scanner = (struct index_scanner *) malloc(sizeof(struct index_scanner)); 00021 00022 snprintf(fnamebuf, FILENAME_MAX, "%s.idx", index_prefix); 00023 scanner->idx_file = fopen(fnamebuf, "r"); 00024 00025 snprintf(fnamebuf, FILENAME_MAX, "%s.lo", index_prefix); 00026 scanner->offsets = fopen(fnamebuf, "r"); 00027 00028 return scanner; 00029 }
Here is the caller graph for this function:

| struct index_scanner* reset_index | ( | struct index_scanner * | scn | ) | [read] |
Reset all the file pointers back to the beginning of the index
Definition at line 159 of file readindex.c.
References index_scanner::idx_file.
Referenced by cluster_clusterSequences().
00160 { 00161 fseek(scn->idx_file, 0, SEEK_SET); 00162 00163 return scn; 00164 }
Here is the caller graph for this function:

1.5.2