include/unpack.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void unpack_initialize ()
unpackRegionunpack_selectRegion (struct unpackRegion *unpackRegions, uint4 numUnpackRegions, uint4 subjectOffset)
void unpack_extendRegionStart (int4 position, struct unpackRegion *unpackRegion)
void unpack_extendRegionEnd (int4 position, struct unpackRegion *unpackRegion)
int4 unpack_loadSubject (struct PSSMatrix PSSMatrix, struct alignment *alignment)
void unpack_unpackSubject (struct PSSMatrix PSSMatrix, struct alignment *alignment)
int unpack_entireSubjectUnpacked (struct alignment *alignment)
void unpack_free ()


Function Documentation

int unpack_entireSubjectUnpacked ( struct alignment alignment  ) 

Definition at line 511 of file unpack.c.

References unpackRegion::endOffset, unpackRegion::startOffset, alignment::subjectLength, and alignment::unpackRegions.

Referenced by alignments_getTracebacks().

00512 {
00513         if (alignment->unpackRegions[0].startOffset == 0 &&
00514         alignment->unpackRegions[0].endOffset == alignment->subjectLength)
00515                 return 1;
00516     else
00517         return 0;
00518 }

Here is the caller graph for this function:

void unpack_extendRegionEnd ( int4  position,
struct unpackRegion unpackRegion 
)

Definition at line 106 of file unpack.c.

References constants_unpackRegionExtend, encoding_byteUnpackRegion(), unpackRegion::endOffset, global_realloc(), int4, unpackRegion::startOffset, unpackRegion::subject, unpackRegion::subjectLength, and unpackRegion::unpackedSubject.

Referenced by gappedExtension_dpAfterSeed().

00107 {
00108         unsigned char* newUnpackedSubject;
00109     int4 newRegionStart, newRegionEnd;
00110 
00111 //    printf("pos=%d region=%d,%d subjectLength=%d\n", position, unpackRegion->startOffset,
00112 //           unpackRegion->endOffset, unpackRegion->subjectLength); fflush(stdout);
00113 
00114         if (position > unpackRegion->endOffset)
00115     {
00116         // Extend the region end
00117         newRegionStart = unpackRegion->startOffset;
00118         newRegionEnd = unpackRegion->endOffset + constants_unpackRegionExtend;
00119         if (newRegionEnd > unpackRegion->subjectLength) newRegionEnd = unpackRegion->subjectLength;
00120 
00121         // Realloc memory for the new region
00122                 unpackRegion->unpackedSubject += unpackRegion->startOffset;
00123                 newUnpackedSubject = (unsigned char*)global_realloc(unpackRegion->unpackedSubject,
00124                               sizeof(char) * (newRegionEnd - newRegionStart));
00125                 newUnpackedSubject -= newRegionStart;
00126 
00127         // Round old end
00128         unpackRegion->endOffset = (unpackRegion->endOffset / 4) * 4;
00129 
00130                 // Unpack the new part of the region
00131                 encoding_byteUnpackRegion(newUnpackedSubject + unpackRegion->endOffset,
00132                 unpackRegion->subject + (unpackRegion->endOffset / 4),
00133             newRegionEnd - unpackRegion->endOffset);
00134 
00135                 unpackRegion->unpackedSubject = newUnpackedSubject;
00136 
00137         unpackRegion->endOffset = newRegionEnd;
00138         }
00139 }

Here is the call graph for this function:

Here is the caller graph for this function:

void unpack_extendRegionStart ( int4  position,
struct unpackRegion unpackRegion 
)

Definition at line 67 of file unpack.c.

References constants_unpackRegionExtend, encoding_byteUnpackRegion(), unpackRegion::endOffset, global_malloc(), int4, unpackRegion::startOffset, unpackRegion::subject, and unpackRegion::unpackedSubject.

Referenced by gappedExtension_dpBeforeSeed().

00068 {
00069         unsigned char* newUnpackedSubject;
00070     int4 newRegionStart, newRegionEnd;
00071 
00072         if (position < unpackRegion->startOffset)
00073     {
00074         // Extend the region start
00075         newRegionStart = unpackRegion->startOffset - constants_unpackRegionExtend;
00076         if (newRegionStart < 0) newRegionStart = 0;
00077         newRegionEnd = unpackRegion->endOffset;
00078 
00079         // Make start of region a multiple of 4
00080         newRegionStart = (newRegionStart / 4) * 4;
00081 
00082         // Declare memory for the new region
00083         newUnpackedSubject = (unsigned char*)global_malloc(sizeof(char) * (newRegionEnd - newRegionStart));
00084                 newUnpackedSubject -= newRegionStart;
00085 
00086         // Copy unpacked subject from old region to new
00087         memcpy(newUnpackedSubject + unpackRegion->startOffset,
00088                unpackRegion->unpackedSubject + unpackRegion->startOffset,
00089                sizeof(char) * (unpackRegion->endOffset - unpackRegion->startOffset));
00090 
00091                 // Free old subject
00092                 unpackRegion->unpackedSubject += unpackRegion->startOffset;
00093         free(unpackRegion->unpackedSubject);
00094 
00095                 // Unpack the new part of the region
00096                 encoding_byteUnpackRegion(newUnpackedSubject + newRegionStart, unpackRegion->subject + (newRegionStart / 4),
00097                                   unpackRegion->startOffset - newRegionStart);
00098 
00099                 unpackRegion->unpackedSubject = newUnpackedSubject;
00100 
00101         unpackRegion->startOffset = newRegionStart;
00102         }
00103 }

Here is the call graph for this function:

Here is the caller graph for this function:

void unpack_free (  ) 

Definition at line 521 of file unpack.c.

References encoding_alphabetType, encoding_nucleotide, memBlocks_free(), memBlocks_getCurrent(), memBlocks_resetCurrent(), parameters_ssearch, unpackRegion::startOffset, unpackRegion::subject, unpack_subjectRegions, unpack_unpackRegions, and unpackRegion::unpackedSubject.

Referenced by alignments_free().

00522 {
00523         struct unpackRegion* region;
00524 
00525     // For each unpack region
00526         if (!parameters_ssearch && encoding_alphabetType == encoding_nucleotide)
00527         {
00528         memBlocks_resetCurrent(unpack_unpackRegions);
00529         while ((region = memBlocks_getCurrent(unpack_unpackRegions)) != NULL)
00530         {
00531             // Free the unpacked sequence
00532             free(region->unpackedSubject + region->startOffset);
00533         }
00534         }
00535 
00536     // For each copied subject region
00537         memBlocks_resetCurrent(unpack_subjectRegions);
00538         while ((region = memBlocks_getCurrent(unpack_subjectRegions)) != NULL)
00539     {
00540                 // Free the subject
00541         free(region->subject + region->startOffset / 4);
00542     }
00543 
00544     memBlocks_free(unpack_unpackRegions);
00545     memBlocks_free(unpack_subjectRegions);
00546 }

Here is the call graph for this function:

Here is the caller graph for this function:

void unpack_initialize (  ) 

Definition at line 14 of file unpack.c.

References constants_initialAllocUnpackRegions, memBlocks_initialize(), unpack_subjectRegions, and unpack_unpackRegions.

Referenced by alignments_initialize().

Here is the call graph for this function:

Here is the caller graph for this function:

int4 unpack_loadSubject ( struct PSSMatrix  PSSMatrix,
struct alignment alignment 
)

Definition at line 291 of file unpack.c.

References blast_totalCopied, alignment::descriptionLocation, alignment::edits, alignment::encodedLength, encoding_alphabetType, encoding_protein, global_malloc(), alignment::inMemorySubject, int4, memBlocks_getLastEntry(), alignment::numUnpackRegions, parameters_verboseDloc, alignment::subject, unpackRegion::subject, alignment::subjectLength, uint4, unpack_getRegions(), unpack_subjectRegions, and alignment::unpackRegions.

Referenced by alignments_loadSubjectsIntoMemory().

00292 {
00293         uint4 totalCopied = 0;
00294         unsigned char *subject, *edits, *endEdits;
00295     struct unpackRegion *firstRegion = NULL, *lastRegion, *currentRegion;
00296     int4 numRegions, regionStart, regionEnd;
00297 
00298     // If protein search
00299     if (encoding_alphabetType == encoding_protein)
00300     {
00301         // Make copy of sequence
00302         subject = (unsigned char*)global_malloc(sizeof(unsigned char) * alignment->encodedLength);
00303         subject++;
00304         memcpy(subject - 1, alignment->subject - 1, alignment->encodedLength);
00305         alignment->subject = subject;
00306 
00307         blast_totalCopied += alignment->encodedLength;
00308     }
00309     // If a nucleotide search
00310     else
00311     {
00312                 // Get a list of regions to copy
00313         numRegions = unpack_getRegions(PSSMatrix, alignment, 1, unpack_subjectRegions);
00314         lastRegion = memBlocks_getLastEntry(unpack_subjectRegions);
00315         lastRegion++;
00316         firstRegion = lastRegion - numRegions;
00317 
00318         #ifdef VERBOSE
00319         if (parameters_verboseDloc == alignment->descriptionLocation)
00320         {
00321                 printf("%d regions for subject\n", lastRegion - firstRegion); fflush(stdout);
00322                 }
00323         #endif
00324 
00325         // Copy each region into memory
00326         currentRegion = firstRegion;
00327         while (currentRegion < lastRegion)
00328         {
00329             #ifdef VERBOSE
00330             if (parameters_verboseDloc == alignment->descriptionLocation)
00331             {
00332                 printf("Load region %d to %d into memory\n", currentRegion->startOffset,
00333                        currentRegion->endOffset); fflush(stdout);
00334                 fflush(stdout);
00335             }
00336             #endif
00337 
00338             regionStart = currentRegion->startOffset / 4;
00339             regionEnd = (currentRegion->endOffset + 3) / 4;
00340 
00341             currentRegion->unpackedSubject = NULL;
00342                         currentRegion->subject = (unsigned char*)global_malloc(sizeof(unsigned char)
00343                                                    * (regionEnd - regionStart));
00344 
00345             totalCopied += regionEnd - regionStart;
00346                         memcpy(currentRegion->subject, alignment->subject + regionStart, regionEnd - regionStart);
00347                         currentRegion->subject -= regionStart;
00348                         currentRegion->subjectLength = alignment->subjectLength;
00349 
00350                 blast_totalCopied += (regionEnd - regionStart);
00351 
00352             currentRegion++;
00353                 }
00354 
00355         // Store new alignment regions
00356         alignment->unpackRegions = firstRegion;
00357         alignment->numUnpackRegions = lastRegion - firstRegion;
00358 
00359         // If there are edits for this subject
00360         if (alignment->edits != NULL)
00361         {
00362             edits = alignment->edits;
00363             endEdits = alignment->subject + alignment->encodedLength;
00364 
00365             // Make an in-memory copy of them
00366             alignment->edits = (unsigned char*)malloc(sizeof(char) * (endEdits - edits));
00367             memcpy(alignment->edits, edits, endEdits - edits);
00368         }
00369 
00370         alignment->subject = NULL;
00371     }
00372 
00373     alignment->inMemorySubject = 1;
00374 
00375     return totalCopied;
00376 
00377 }

Here is the call graph for this function:

Here is the caller graph for this function:

struct unpackRegion* unpack_selectRegion ( struct unpackRegion unpackRegions,
uint4  numUnpackRegions,
uint4  subjectOffset 
) [read]

Definition at line 46 of file unpack.c.

References unpackRegion::endOffset, and unpackRegion::startOffset.

Referenced by alignments_getTracebacks(), alignments_selectRegion(), print_gappedAlignmentsFull(), and unpack_unpackSubject().

00048 {
00049     // For each region
00050         while (numUnpackRegions > 0)
00051     {
00052         if (unpackRegions->startOffset <= subjectOffset && unpackRegions->endOffset > subjectOffset)
00053         {
00054                         // Return it if seed falls within its range
00055             return unpackRegions;
00056         }
00057 
00058         unpackRegions++;
00059                 numUnpackRegions--;
00060     }
00061 
00062     fprintf(stderr, "Error unpacking subject and performing gapped alignment\n");
00063     exit(-1);
00064 }

Here is the caller graph for this function:

void unpack_unpackSubject ( struct PSSMatrix  PSSMatrix,
struct alignment alignment 
)

Definition at line 380 of file unpack.c.

References blast_totalUnpacked, alignment::descriptionLocation, alignment::edits, alignment::encodedLength, encoding_alphabetType, encoding_byteUnpackRegion(), encoding_protein, unpackRegion::endOffset, global_malloc(), int4, memBlocks_getLastEntry(), memBlocks_newEntry(), alignment::numUnpackRegions, parameters_ssearch, parameters_verboseDloc, unpackRegion::startOffset, alignment::subject, unpackRegion::subject, unpackRegion::subjectLength, alignment::subjectLength, uint4, unpack_compareUnpackRegions(), unpack_getRegions(), unpack_selectRegion(), unpack_unpackRegions, unpackRegion::unpackedSubject, alignment::unpackRegions, and vbyte_getVbyte.

Referenced by alignments_getTracebacks().

00381 {
00382     unsigned char *subject, *unpackedSubject, wildcard, *edits, *endEdits;
00383     uint4 wildcardPosition;
00384     struct unpackRegion *firstRegion = NULL, *lastRegion, *currentRegion, *unpackRegion;
00385     int4 regionStart, regionEnd, numRegions;
00386 
00387     // No need to unpack a protein subject, or already unpacked nucleotide subject
00388     if (parameters_ssearch || encoding_alphabetType == encoding_protein)
00389     {
00390         // Just create a single region covering the entire sequence
00391         firstRegion = memBlocks_newEntry(unpack_unpackRegions);
00392         firstRegion->startOffset = 0;
00393         firstRegion->endOffset = alignment->subjectLength;
00394         firstRegion->subject = alignment->subject;
00395         firstRegion->unpackedSubject = alignment->subject;
00396         firstRegion->subjectLength = alignment->subjectLength;
00397         alignment->unpackRegions = firstRegion;
00398         alignment->numUnpackRegions = 1;
00399         return;
00400     }
00401 
00402     // Get the subject regions for this alignment
00403     numRegions = unpack_getRegions(PSSMatrix, alignment, 0, unpack_unpackRegions);
00404     lastRegion = memBlocks_getLastEntry(unpack_unpackRegions);
00405     lastRegion++;
00406     firstRegion = lastRegion - numRegions;
00407 
00408     // Sort the regions in order of start position
00409         qsort(firstRegion, lastRegion - firstRegion,
00410           sizeof(struct unpackRegion), unpack_compareUnpackRegions);
00411 
00412     // Unpack each region
00413     currentRegion = firstRegion;
00414     while (currentRegion < lastRegion)
00415     {
00416         regionEnd = currentRegion->endOffset;
00417         regionStart = currentRegion->startOffset;
00418 
00419         #ifdef VERBOSE
00420         if (parameters_verboseDloc == alignment->descriptionLocation)
00421                 {
00422                 printf("Unpack subject region %d to %d (length=%d)\n", regionStart, regionEnd,
00423                    alignment->subjectLength);
00424             fflush(stdout);
00425         }
00426                 #endif
00427 
00428                 // Get the subject region to be unpacked
00429         if (alignment->unpackRegions == NULL)
00430         {
00431                 subject = alignment->subject;
00432         }
00433         else
00434         {
00435             unpackRegion = unpack_selectRegion(alignment->unpackRegions, alignment->numUnpackRegions,
00436                                                regionStart);
00437                         subject = unpackRegion->subject;
00438         }
00439 
00440                 // Declare memory for the region
00441         unpackedSubject = (unsigned char*)global_malloc(sizeof(char) * (regionEnd - regionStart));
00442 
00443                 // Unpack the region of interest
00444         encoding_byteUnpackRegion(unpackedSubject, subject + (regionStart / 4),
00445                                   regionEnd - regionStart);
00446         unpackedSubject -= regionStart;
00447                 currentRegion->unpackedSubject = unpackedSubject;
00448 
00449         currentRegion->subject = subject;
00450         currentRegion->subjectLength = alignment->subjectLength;
00451 
00452         blast_totalUnpacked += (regionEnd - regionStart);
00453 
00454         currentRegion++;
00455     }
00456 
00457         currentRegion = firstRegion;
00458 
00459         // Get wildcard edits for the sequence
00460         edits = alignment->edits;
00461         endEdits = alignment->edits + alignment->encodedLength - ((alignment->subjectLength + 3) / 4);
00462 
00463     // If there are edits
00464     if (edits < endEdits)
00465     {
00466         // Read first wildcard
00467         wildcard = *edits;
00468         edits++;
00469 
00470         // Read its position
00471         vbyte_getVbyte(edits, &wildcardPosition);
00472 
00473         // For each region in order of position in the subject
00474         while (currentRegion < lastRegion)
00475         {
00476             // Skip past edits that are before current region
00477             while (edits < endEdits && wildcardPosition < currentRegion->startOffset)
00478             {
00479                 // Read wildcard
00480                 wildcard = *edits;
00481                 edits++;
00482 
00483                 // Read its position
00484                 vbyte_getVbyte(edits, &wildcardPosition);
00485             }
00486 
00487             // Process edits that are in the current region
00488             while (edits < endEdits && wildcardPosition < currentRegion->endOffset)
00489             {
00490                 // Insert wildcard into sequence
00491                 currentRegion->unpackedSubject[wildcardPosition] = wildcard;
00492 
00493                 // Read next wildcard
00494                 wildcard = *edits;
00495                 edits++;
00496 
00497                 // Read its position
00498                 vbyte_getVbyte(edits, &wildcardPosition);
00499             }
00500 
00501             // Advance to the next region
00502             currentRegion++;
00503         }
00504         }
00505 
00506     alignment->unpackRegions = firstRegion;
00507     alignment->numUnpackRegions = lastRegion - firstRegion;
00508 }

Here is the call graph for this function:

Here is the caller graph for this function:


Generated on Wed Dec 19 20:52:41 2007 for fsa-blast by  doxygen 1.5.2