// Implementation of some algorithms for pairwise alignment from // Durbin et al: Biological Sequence Analysis, CUP 1998, chapter 3. // Peter Sestoft, sestoft@itu.dk (Oct 1999), 2001-08-20 version 0.7 // Reference: http://www.itu.dk/people/sestoft/bsa.html // License: Anybody can use this code for any purpose, including // teaching, research, and commercial purposes, provided proper // reference is made to its origin. Neither the author nor the Royal // Veterinary and Agricultural University, Copenhagen, Denmark, can // take any responsibility for the consequences of using this code. // Compile with: // javac Match3.java // Run with: // java Match3 // Notational conventions: // i = 1,...,L indexes x, the observed string, x_0 not a symbol // k,ell = 0,...,hmm.nstate-1 indexes hmm.state(k) a_0 is the start state import java.text.*; import java.util.*; // Some algorithms for Hidden Markov Models (Chapter 3): Viterbi, // Forward, Backward, Baum-Welch. We compute with log probabilities. class HMM { // State names and state-to-state transition probabilities int nstate; // number of states (incl initial state) String[] state; // names of the states double[][] loga; // loga[k][ell] = log(P(k -> ell)) // Emission names and emission probabilities int nesym; // number of emission symbols String esym; // the emission symbols e1,...,eL (characters) double[][] loge; // loge[k][ei] = log(P(emit ei in state k)) // Input: // state = array of state names (except initial state) // amat = matrix of transition probabilities (except initial state) // esym = string of emission names // emat = matrix of emission probabilities public HMM(String[] state, double[][] amat, String esym, double[][] emat) { if (state.length != amat.length) throw new IllegalArgumentException("HMM: state and amat disagree"); if (amat.length != emat.length) throw new IllegalArgumentException("HMM: amat and emat disagree"); for (int i=0; i start) = 0 loga[0][0] = Double.NEGATIVE_INFINITY; // = log(0) // P(start -> other) = 1.0/state.length double fromstart = Math.log(1.0/state.length); for (int j=1; j start) = 0 loga[i][0] = Double.NEGATIVE_INFINITY; // = log(0) for (int j=1; j esym.charAt(b); assume all esyms <= 'Z' int[] esyminv = new int[91]; for (int i=0; i maxprod) { kmax = k; maxprod = prod; } } v[i][ell] = hmm.loge[ell][x.charAt(i-1)] + maxprod; B[i][ell] = new Traceback2(i-1, kmax); } int kmax = 0; double max = v[L][kmax]; for (int k=1; k max) { kmax = k; max = v[L][k]; } } B0 = new Traceback2(L, kmax); } public String getPath() { StringBuffer res = new StringBuffer(); Traceback2 tb = B0; int i = tb.i, j = tb.j; while ((tb = B[tb.i][tb.j]) != null) { res.append(hmm.state[j]); i = tb.i; j = tb.j; } return res.reverse().toString(); } public void print(Output out) { for (int j=0; j=1; i--) for (int k=0; k