import java.util.Random;
/**
* @author albrecht (felipe.albrecht@gmail.com)
*
* TODO: a better main method.
*/
public class SequenceMutator {
private static Random random = new Random();
/**
* Default probability for a base mutation occurs.
*/
public static final double DEFAULT_MUTATE_BASE_RATION = 0.003;
/**
* Default probability for a sub-sequence be deleted occurs.
*/
public static final double DEFAULT_DELETION_RATIO = 0.05;
/**
* Default probability for a sub-sequence be inserted occurs.
*/
public static final double DEFAULT_INSERTION_RATIO = 0.04;
/**
* Default probability for a sub-sequence be duplicated occurs.
*/
public static final double DEFAULT_DUPLICATION_RATIO = 0.01;
/**
* Default probability for a sub-sequence be dislocated occurs.
*/
public static final double DEFAULT_DISLOCATION_RATIO = 0.01;
/**
* Default probability for a sub-sequence be inverted occurs.
*/
public static final double DEFAULT_INVERSION_RATIO = 0.01;
/**
* Default probability for a sub-sequence be inverted occurs.
*/
public static final double DEFAULT_DISLOCATION_INVERSION_RATIO = 0.01;
/**
* Proportional size related with sequence that will be modified.
*
Example, for a deletion mutation on a 20 bases length sequence and proportional_size of 4
* the maximum size of the deleted sub-sequence will be 20/4: 5 bases.
*/
public static final int DEFAULT_PROPORTION_SIZE = 4;
private final static int PROBABILITIES_VECTOR_SIZE = 10000;
private static final int[] DEFAULT_PROBABILITY_VECTOR = createProbabilitiesVector(DEFAULT_MUTATE_BASE_RATION, DEFAULT_DELETION_RATIO, DEFAULT_INSERTION_RATIO, DEFAULT_DUPLICATION_RATIO, DEFAULT_DISLOCATION_RATIO, DEFAULT_INVERSION_RATIO, DEFAULT_DISLOCATION_INVERSION_RATIO);
// Informations to fill the probabilities vector.
static final int NOTHING = 0;
static final int MUTATE_BASE = 1;
static final int DELETION = 2;
static final int INSERTION = 3;
static final int DUPLICATION = 4;
static final int DISLOCATION = 5;
static final int INVERSION = 6;
static final int DISLOCATION_INVERSION = 7;
/**
* Create a probabilities vector with the given probabilities.
* The sensibilities for each mutation came from 0.001 (0.00001%) to 10,000.00 (100%)
* This methods do not check if a single or the total probabilities are higher then
* @param mutateBase
* @param deletion
* @param insertion
* @param duplication
* @param dislocation
* @param inversion
* @param dislocationWithInversion
* @return a vector with the probabilities.
*/
public static int[] createProbabilitiesVector(double mutateBase, double deletion, double insertion, double duplication, double dislocation, double inversion, double dislocationWithInversion) {
int[] probabilitiesVector = new int[PROBABILITIES_VECTOR_SIZE];
int pos = 0;
for (int i = pos; i < mutateBase * 1000; i++) {
probabilitiesVector[i] = MUTATE_BASE;
}
pos += mutateBase * 1000;
for (int i = pos; i < (deletion * 1000) + pos; i++) {
probabilitiesVector[i] = DELETION;
}
pos += deletion * 1000;
for (int i = pos; i < (insertion * 1000) + pos; i++) {
probabilitiesVector[i] = INSERTION;
}
pos += insertion * 1000;
for (int i = pos; i < (duplication * 1000) + pos; i++) {
probabilitiesVector[i] = DUPLICATION;
}
pos += duplication * 1000;
for (int i = pos; i < (dislocation * 1000) + pos; i++) {
probabilitiesVector[i] = DISLOCATION;
}
pos += dislocation * 1000;
for (int i = pos; i < (inversion * 1000) + pos; i++) {
probabilitiesVector[i] = INVERSION;
}
pos += inversion * 1000;
for (int i = pos; i < (dislocationWithInversion * 1000) + pos; i++) {
probabilitiesVector[i] = DISLOCATION_INVERSION;
}
return probabilitiesVector;
}
/**
* Mutate the input sequence using the probabilities vector generations times.
*
* @param sequence
* @param generations
* @param proporsionSize
* @param probabilitiesVector
* @return a new sequence mutated.
*/
public static String mutateSequence(String sequence, int generations, int proporsionSize, int[] probabilitiesVector) {
for (int i = 0; i < generations; i++) {
switch (probabilitiesVector[random.nextInt(PROBABILITIES_VECTOR_SIZE)]) {
case MUTATE_BASE:
sequence = mutateBase(sequence);
break;
case DELETION:
sequence = mutateDelete(sequence, proporsionSize);
break;
case INSERTION:
sequence = mutateInsertion(sequence, proporsionSize);
break;
case DUPLICATION:
sequence = mutateDuplication(sequence, proporsionSize);
break;
case DISLOCATION:
sequence = mutateDislocation(sequence, proporsionSize);
break;
case INVERSION:
sequence = mutateInvertation(sequence);
break;
case DISLOCATION_INVERSION:
sequence = mutateDislocationInvertation(sequence, proporsionSize);
break;
}
}
return sequence;
}
/**
* Simple main for test and fast applications.
*
* @param args
*/
public static void main(String[] args) {
if (args.length == 1) {
printHelp();
}
String sequence = args[0];
int generations = Integer.parseInt(args[1]);
System.out.println(mutateSequence(sequence, generations, 4, DEFAULT_PROBABILITY_VECTOR));
}
private static void printHelp() {
System.out.println("SequenceMutator help:");
System.out.println(" ");
System.out.println("Exemple: java SequenceMutator GCTAGCTAGCATGACTGCAGCTGACACGCGGCGATTATTGCATCG 100");
System.out.println("to change the probabilities values, change at souce code :-)");
System.out.println("It's only a main for test propose, please, implement yours application and use this class.");
}
/**
* Change a random base in the input sequence.
*
* @param sequence
* @return a new sequence with the mutation.
*/
public static String mutateBase(String sequence) {
if (sequence.length() == 0) {
return sequence;
}
int pos = randomPos(sequence);
char[] charArray = sequence.toCharArray();
charArray[pos] = getRandomBase();
return new String(charArray);
}
/**
* Delete a random sub-sequence.
*
* @param sequence
* @param maxSizeProportion max proportion of the sequence that will be removed.
*
* @return a new sequence with the mutation.
*/
public static String mutateDelete(String sequence, int maxSizeProportion) {
if (sequence.length() == 0) {
return sequence;
}
assert maxSizeProportion >= 1;
int deletationLength = randomLength(sequence, maxSizeProportion);
int pos = randomPos(sequence.length() - deletationLength);
char[] charArray = new char[sequence.length() - deletationLength];
for (int i = 0; i < pos; i++) {
charArray[i] = sequence.charAt(i);
}
for (int i = pos + deletationLength; i < sequence.length(); i++) {
charArray[i - deletationLength] = sequence.charAt(i);
}
return new String(charArray);
}
/**
* Insert a random sub-sequence.
*
* @param sequence
* @param maxSizeProportion max proportion of the sequence that will be inserted.
*
* @return a new sequence with the mutation.
*/
public static String mutateInsertion(String sequence, int maxSizeProportion) {
assert maxSizeProportion >= 1;
int pos = randomPos(sequence);
int insertionLength = randomLength(sequence, maxSizeProportion);
char[] insertionSequence = randomSequence(insertionLength).toCharArray();
char[] charArray = insertSequence(sequence, pos, insertionSequence);
return new String(charArray);
}
/**
* Duplicate a random sub-sequence and put into a random place in the sequence.
*
* @param sequence
* @param maxSizeProportion max proportion of the sequence that will be duplicated.
*
* @return a new sequence with the mutation.
*/
public static String mutateDuplication(String sequence, int maxSizeProportion) {
if (sequence.length() == 0) {
return sequence;
}
assert maxSizeProportion >= 1;
int insertionPos = randomPos(sequence);
int duplicatePos = randomPos(sequence);
int duplicationLength = randomLength(sequence, maxSizeProportion);
if (duplicatePos + duplicationLength > sequence.length() - 1) {
duplicatePos = sequence.length() - duplicationLength;
}
String duplicateSequence = sequence.substring(duplicatePos, duplicatePos + duplicationLength);
char[] charArray = insertSequence(sequence, insertionPos, duplicateSequence.toCharArray());
return new String(charArray);
}
/**
* Dislocate a random sub-sequence and put into a random place in the sequence.
*
* @param sequence
* @param maxSizeProportion max proportion of the sequence that will be dislocated.
*
* @return a new sequence with the mutation.
*/
public static String mutateDislocation(String sequence, int maxSizeProportion) {
if (sequence.length() == 0 || sequence.length() == 1) {
return sequence;
}
assert maxSizeProportion >= 1;
int remotionPos = randomPos(sequence);
int dislocationLength = randomLength(sequence, maxSizeProportion);
if (remotionPos + dislocationLength > sequence.length() - 1) {
remotionPos = sequence.length() - dislocationLength;
}
String dislocationSequence = sequence.substring(remotionPos, remotionPos + dislocationLength);
char[] charArray = new char[sequence.length() - dislocationLength];
for (int i = 0; i < remotionPos; i++) {
charArray[i] = sequence.charAt(i);
}
for (int i = remotionPos + dislocationLength; i < sequence.length(); i++) {
charArray[i - dislocationLength] = sequence.charAt(i);
}
int destinationPos = randomPos(charArray.length);
charArray = insertSequence(new String(charArray), destinationPos, dislocationSequence.toCharArray());
return new String(charArray);
}
/**
* Invert a random sub-sequence.
*
* @param sequence
* @param maxSizeProportion max proportion of the sequence that will be inverted.
*
* @return a new sequence with the mutation.
*/
public static String mutateInvertation(String sequence) {
if (sequence.length() <= 1) {
return sequence;
}
char[] charArray = new char[sequence.length()];
for (int i = 0; i < sequence.length(); i++) {
charArray[sequence.length() - i - 1] = sequence.charAt(i);
}
return new String(charArray);
}
/**
* Dislocate and invert a random sub-sequence.
*
* @param sequence
* @param maxSizeProportion max proportion of the sequence that will be dislocate and inverted.
*
* @return a new sequence with the mutation.
*/
public static String mutateDislocationInvertation(String sequence, int maxSizeProportion) {
if (sequence.length() == 0 || sequence.length() == 1) {
return sequence;
}
assert maxSizeProportion >= 1;
int remotionPos = randomPos(sequence);
int dislocationLength = randomLength(sequence, maxSizeProportion);
if (remotionPos + dislocationLength > sequence.length() - 1) {
remotionPos = sequence.length() - dislocationLength;
}
String dislocationSequence = sequence.substring(remotionPos, remotionPos + dislocationLength);
dislocationSequence = mutateInvertation(dislocationSequence);
char[] charArray = new char[sequence.length() - dislocationLength];
for (int i = 0; i < remotionPos; i++) {
charArray[i] = sequence.charAt(i);
}
for (int i = remotionPos + dislocationLength; i < sequence.length(); i++) {
charArray[i - dislocationLength] = sequence.charAt(i);
}
int destinationPos = randomPos(charArray.length);
charArray = insertSequence(new String(charArray), destinationPos, dislocationSequence.toCharArray());
return new String(charArray);
}
private static char[] insertSequence(String sequence, int pos, char[] insertionSequence) {
char[] charArray = new char[sequence.length() + insertionSequence.length];
for (int i = 0; i < pos; i++) {
charArray[i] = sequence.charAt(i);
}
for (int i = pos; i < pos + insertionSequence.length; i++) {
charArray[i] = insertionSequence[i - pos];
}
for (int i = pos + insertionSequence.length; i < sequence.length() + insertionSequence.length; i++) {
charArray[i] = sequence.charAt(i - insertionSequence.length);
}
return charArray;
}
private static int randomLength(String sequence, int maxSizeProportion) {
assert maxSizeProportion >= 1;
// random.next(1) will return always 0, so, the probability to return 1 should be calculed on maxSizeProportion
if ((sequence.length() / maxSizeProportion) <= 1) {
return calculateBonus(maxSizeProportion);
}
return randomPos(sequence.length() / maxSizeProportion);
}
private static int calculateBonus(int maxSizeProportion) {
double change = ((1 / (double) maxSizeProportion) * 100);
if (random.nextInt(100) <= change) {
return 1;
}
return 0;
}
private static int randomPos(String sequence) {
return randomPos(sequence.length());
}
private static int randomPos(int length) {
if (length <= 1) {
return 0;
}
return random.nextInt(length);
}
/**
* Create a random sequence.
*
* @param length of the new sequence.
*
* @return the generated sequence.
*/
public static String randomSequence(int length) {
char[] charArray = new char[length];
for (int i = 0; i < length; i++) {
charArray[i] = getRandomBase();
}
return new String(charArray);
}
static char[] bases = new char[] { 'A', 'C', 'G', 'T' };
/**
* Get a random DNA base.
*
* @return a random DNA base.
*/
public static char getRandomBase() {
return bases[random.nextInt(4)];
}
}