lib/oligoTm.c

Go to the documentation of this file.
00001 /* oligoTm - calculate melting temperature of relatively short DNA sequences.
00002  * This is based on the nearest-neighbor thermodynamics of bases from Breslauer,
00003  * Frank, Bloecker, and Markey, Proc. Natl. Acad. Sci. USA, vol 83, page 3748,
00004  * and uses work from see Rychlik, Spencer, Roads, Nucleic Acids Research, vol 18, 
00005  * no 21.  This code was imported from the oligotm module of Whitehead Institute's
00006  * primer3 program, and adapted into UCSC conventions by Jim Kent.  Any redistribution
00007  * of this code should contain the following copyright notice from Whitehead:
00008  *
00009  * Copyright (c) 1996,1997,1998,1999,2000,2001,2004
00010  *         Whitehead Institute for Biomedical Research. All rights reserved.
00011  * 
00012  * Redistribution and use in source and binary forms, with or without
00013  * modification, are permitted provided that the following conditions are met:
00014  * 
00015  * 1.      Redistributions must reproduce the above copyright notice, this
00016  * list of conditions and the following disclaimer in the  documentation
00017  * and/or other materials provided with the distribution.  Redistributions of
00018  * source code must also reproduce this information in the source code itself.
00019  * 
00020  * 2.      If the program is modified, redistributions must include a notice
00021  * (in the same places as above) indicating that the redistributed program is
00022  * not identical to the version distributed by Whitehead Institute.
00023  * 
00024  * 3.      All advertising materials mentioning features or use of this
00025  * software  must display the following acknowledgment:
00026  *         This product includes software developed by the
00027  *         Whitehead Institute for Biomedical Research.
00028  * 
00029  * 4.      The name of the Whitehead Institute may not be used to endorse or
00030  * promote products derived from this software without specific prior written
00031  * permission.
00032  * 
00033  * We also request that use of this software be cited in publications as 
00034  * 
00035  *   Rozen, S., Skaletsky, H.  \"Primer3 on the WWW for general users
00036  *   and for biologist programmers.\"  In S. Krawetz and S. Misener, eds.
00037  *   Bioinformatics Methods and Protocols in the series Methods in 
00038  *   Molecular Biology.  Humana Press, Totowa, NJ, 2000, pages 365-386.
00039  *   Code available at
00040  *   http://fokker.wi.mit.edu/primer3/.
00041  * 
00042  * THIS SOFTWARE IS PROVIDED BY THE WHITEHEAD INSTITUTE ``AS IS'' AND  ANY
00043  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE  IMPLIED
00044  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE  ARE
00045  * DISCLAIMED. IN NO EVENT SHALL THE WHITEHEAD INSTITUTE BE LIABLE  FOR ANY
00046  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  DAMAGES
00047  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS  OR
00048  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER
00049  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00050  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00051  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00052  * SUCH DAMAGE. */
00053 
00054 
00055 
00056 #include "common.h"
00057 #include "oligoTm.h"
00058 
00059 /* 
00060  * Tables of nearest-neighbor thermodynamics for DNA bases.  See Breslauer,
00061  * Frank, Bloecker, and Markey, Proc. Natl. Acad. Sci. USA, vol 83, page 3748,
00062  * table 2.
00063  */
00064 #define S_A_A 240
00065 #define S_A_C 173
00066 #define S_A_G 208
00067 #define S_A_T 239
00068 #define S_A_N 215
00069   
00070 #define S_C_A 129
00071 #define S_C_C 266
00072 #define S_C_G 278
00073 #define S_C_T 208
00074 #define S_C_N 220  
00075   
00076 #define S_G_A 135
00077 #define S_G_C 267
00078 #define S_G_G 266
00079 #define S_G_T 173
00080 #define S_G_N 210
00081   
00082 #define S_T_A 169
00083 #define S_T_C 135
00084 #define S_T_G 129
00085 #define S_T_T 240
00086 #define S_T_N 168
00087   
00088 #define S_N_A 168
00089 #define S_N_C 210
00090 #define S_N_G 220
00091 #define S_N_T 215
00092 #define S_N_N 203
00093 
00094 
00095 #define H_A_A  91
00096 #define H_A_C  65
00097 #define H_A_G  78
00098 #define H_A_T  86
00099 #define H_A_N  80
00100 
00101 #define H_C_A  58
00102 #define H_C_C 110
00103 #define H_C_G 119
00104 #define H_C_T  78
00105 #define H_C_N  91
00106 
00107 #define H_G_A  56
00108 #define H_G_C 111
00109 #define H_G_G 110
00110 #define H_G_T  65
00111 #define H_G_N  85
00112 
00113 #define H_T_A  60
00114 #define H_T_C  56
00115 #define H_T_G  58
00116 #define H_T_T  91
00117 #define H_T_N  66
00118 
00119 #define H_N_A  66
00120 #define H_N_C  85
00121 #define H_N_G  91
00122 #define H_N_T  80
00123 #define H_N_N  80
00124 
00125 /* Delta G's of disruption * 1000. */
00126 #define G_A_A  1900
00127 #define G_A_C  1300
00128 #define G_A_G  1600
00129 #define G_A_T  1500
00130 #define G_A_N  1575
00131 
00132 #define G_C_A  1900 
00133 #define G_C_C  3100
00134 #define G_C_G  3600
00135 #define G_C_T  1600
00136 #define G_C_N  2550
00137 
00138 #define G_G_A  1600
00139 #define G_G_C  3100
00140 #define G_G_G  3100
00141 #define G_G_T  1300
00142 #define G_G_N  2275
00143 
00144 #define G_T_A   900
00145 #define G_T_C  1600
00146 #define G_T_G  1900
00147 #define G_T_T  1900
00148 #define G_T_N  1575
00149 
00150 #define G_N_A  1575
00151 #define G_N_C  2275
00152 #define G_N_G  2550
00153 #define G_N_T  1575
00154 #define G_N_N  1994
00155 
00156 #define A_CHAR 'A'
00157 #define G_CHAR 'G'
00158 #define T_CHAR 'T'
00159 #define C_CHAR 'C'
00160 #define N_CHAR 'N'
00161 
00162 #define CATID5(A,B,C,D,E) A##B##C##D##E
00163 #define CATID2(A,B) A##B
00164 #define DO_PAIR(LAST,THIS)          \
00165   if (CATID2(THIS,_CHAR) == c) {    \
00166      dh += CATID5(H,_,LAST,_,THIS); \
00167      ds += CATID5(S,_,LAST,_,THIS); \
00168      goto CATID2(THIS,_STATE);      \
00169   }
00170 
00171 #define STATE(LAST)     \
00172    CATID2(LAST,_STATE): \
00173    c = *s; s++;         \
00174    DO_PAIR(LAST,A)      \
00175    else DO_PAIR(LAST,T) \
00176    else DO_PAIR(LAST,G) \
00177    else DO_PAIR(LAST,C) \
00178    else DO_PAIR(LAST,N) \
00179    else if ('\0' == c)  \
00180              goto DONE; \
00181    else goto ERROR \
00182 
00183 double oligoTm(char *dna, double DNA_nM, double K_mM)
00184 /* Calculate melting point of short DNA sequence given DNA concentration in 
00185  * nanomoles, and salt concentration in millimoles.  This is calculated using eqn
00186  * (ii) in Rychlik, Spencer, Roads, Nucleic Acids Research, vol 18, no 21, page
00187  * 6410, with tables of nearest-neighbor thermodynamics for DNA bases as
00188  * provided in Breslauer, Frank, Bloecker, and Markey,
00189  * Proc. Natl. Acad. Sci. USA, vol 83, page 3748. */
00190 {
00191     register int dh = 0, ds = 108;
00192     register char c;
00193     char *dupe = cloneString(dna);
00194     char *s = dupe;
00195     double delta_H, delta_S;
00196 
00197     touppers(s);
00198     /* Use a finite-state machine (DFA) to calucluate dh and ds for s. */
00199     c = *s; s++;
00200     if (c == 'A') goto A_STATE;
00201     else if (c == 'G') goto G_STATE;
00202     else if (c == 'T') goto T_STATE;
00203     else if (c == 'C') goto C_STATE;
00204     else if (c == 'N') goto N_STATE;
00205     else goto ERROR;
00206     STATE(A);
00207     STATE(T);
00208     STATE(G);
00209     STATE(C);
00210     STATE(N);
00211 
00212     DONE:  /* dh and ds are now computed for the given sequence. */
00213     delta_H = dh * -100.0;  /* 
00214                              * Nearest-neighbor thermodynamic values for dh
00215                              * are given in 100 cal/mol of interaction.
00216                              */
00217     delta_S = ds * -0.1;     /*
00218                               * Nearest-neighbor thermodynamic values for ds
00219                               * are in in .1 cal/K per mol of interaction.
00220                               */
00221 
00222     /* 
00223      * See Rychlik, Spencer, Roads, Nucleic Acids Research, vol 18, no 21,
00224      * page 6410, eqn (ii).
00225      */
00226     freeMem(dupe);
00227     return delta_H / (delta_S + 1.987 * log(DNA_nM/4000000000.0))
00228         - 273.15 + 16.6 * log10(K_mM/1000.0);
00229 
00230     ERROR:  /* 
00231           * length of s was less than 2 or there was an illegal character in
00232           * s.
00233           */
00234     freeMem(dupe);
00235     errAbort("Not a valid oligo in oligoTm.");
00236     return 0;
00237 }
00238 #undef DO_PAIR
00239 
00240 #define DO_PAIR(LAST,THIS)          \
00241   if (CATID2(THIS,_CHAR) == c) {    \
00242      dg += CATID5(G,_,LAST,_,THIS); \
00243      goto CATID2(THIS,_STATE);      \
00244   }
00245 
00246 double oligoDg(char *dna)
00247 /* Calculate dg (change in Gibb's free energy) from melting oligo
00248  * the nearest neighbor model. Seq should be relatively short, given 
00249  * the characteristics of the nearest neighbor model (36 bases or less
00250  * is best). */
00251 {
00252     register int dg = 0;
00253     register char c;
00254     char *dupe = cloneString(dna);
00255     char *s = dupe;
00256 
00257     /* Use a finite-state machine (DFA) to calculate dg s. */
00258     c = *s; s++;
00259     if (c == 'A') goto A_STATE;
00260     else if (c == 'G') goto G_STATE;
00261     else if (c == 'T') goto T_STATE;
00262     else if (c == 'C') goto C_STATE;
00263     else if (c == 'N') goto N_STATE;
00264     else goto ERROR;
00265     STATE(A);
00266     STATE(T);
00267     STATE(G);
00268     STATE(C);
00269     STATE(N);
00270 
00271     DONE:  /* dg is now computed for the given sequence. */
00272     freeMem(dupe);
00273     return dg / 1000.0;
00274 
00275     ERROR:
00276     freeMem(dupe);
00277     errAbort("Not a valid oligo in oligoDg.");
00278     return 0;
00279 }
00280 
00281 
00282 double longSeqTm(char *s, int start, int len, double salt_conc)
00283 /* Calculate the melting temperature of substr(seq, start, length) using the
00284  * formula from Bolton and McCarthy, PNAS 84:1390 (1962) as presented in
00285  * Sambrook, Fritsch and Maniatis, Molecular Cloning, p 11.46 (1989, CSHL
00286  * Press).
00287  *
00288  * Tm = 81.5 + 16.6(log10([Na+])) + .41*(%GC) - 600/length
00289  *
00290  * Where [Na+] is the molar sodium concentration, (%GC) is the percent of Gs
00291  * and Cs in the sequence, and length is the length of the sequence.
00292  *
00293  * A similar formula is used by the prime primer selection program in GCG
00294  * (http://www.gcg.com), which instead uses 675.0 / length in the last term
00295  * (after F. Baldino, Jr, M.-F. Chesselet, and M.E.  Lewis, Methods in
00296  * Enzymology 168:766 (1989) eqn (1) on page 766 without the mismatch and
00297  * formamide terms).  The formulas here and in Baldino et al. assume Na+ rather
00298  * than K+.  According to J.G. Wetmur, Critical Reviews in BioChem. and
00299  * Mol. Bio. 26:227 (1991) 50 mM K+ should be equivalent in these formulae to .2
00300  * M Na+.
00301  *
00302  * This function takes salt_conc to be the millimolar (mM) concentration,
00303  * since mM is the usual units in PCR applications.  */
00304 {
00305   int GC_count = 0;
00306   char *p, *end;
00307 
00308   if(start + len > strlen(s) || start < 0 || len <= 0) 
00309         errAbort("bad input to longSeqTm");
00310   end = &s[start + len];
00311   /* Length <= 0 is nonsensical. */
00312   for (p = &s[start]; p < end; p++) {
00313     if ('G' == *p || 'g' == *p || 'C' == *p || 'c' == *p)
00314       GC_count++;
00315   }
00316 
00317   return
00318     81.5
00319     + (16.6 * log10(salt_conc / 1000.0))
00320     + (41.0 * (((double) GC_count) / len))
00321     - (600.0 / len);
00322 
00323 }
00324 
00325 double seqTm(char *seq, double dna_conc, double salt_conc)
00326 /* Figure out melting temperature of sequence of any length given
00327  * dna and salt concentration. */
00328 {
00329   int len = strlen(seq);
00330   return (len > 36)
00331     ? longSeqTm(seq, 0, len, salt_conc) : oligoTm(seq, dna_conc, salt_conc);
00332 }

Generated on Tue Dec 25 18:39:31 2007 for blat by  doxygen 1.5.2