statalign.postprocess.utils
Class RNAFoldingTools

java.lang.Object
  extended by statalign.postprocess.utils.RNAFoldingTools

public class RNAFoldingTools
extends java.lang.Object

Author:
Michael

Nested Class Summary
 class RNAFoldingTools.MultiThreadedPosteriorDecoding
          A class, which given a base-pairing probability matrix and an array representing the probabilities of a single nucleotides being unpaired, performs a multi-threaded posterior-decoding and returns the MPD consensus structure.
 
Field Summary
static double emptyValue
           
 
Constructor Summary
RNAFoldingTools()
           
 
Method Summary
static double calculatePairsOnlyReliabilityScore(int[] pairedSites, double[][] basePairProb)
           
static double calculatePairsOnlyReliabilityScore(int[] pairedSites, double[][] basePairProb, java.util.ArrayList<java.lang.Double> weights)
           
static double calculatePPfoldReliabilityScore(int[] pairedSites, double[][] basePairProb)
           
static double[][] getBasePairCountMatrix(java.lang.String[] dotBracketStructures)
          Given an String array of dot-bracket structures, fills a matrix which counts the number of times a pair of nucleotides is base-paired in each structure.
static java.lang.String getDotBracketStringFromCtFile(java.io.File ctFile)
           
static java.lang.String getDotBracketStringFromPairedSites(int[] pairedSites)
          Returns a dot bracket string representation given an array paired sites.
static double[][] getDoubleMatrix(float[][] matrix)
           
static float[][] getFloatMatrix(double[][] matrix)
           
static int[] getPairedSitesFromCtFile(java.io.File ctFile)
           
static int[] getPairedSitesFromDBNStringFile(java.io.File dbnFile)
           
static int[] getPairedSitesFromDotBracketString(java.lang.String dotBracketStructure)
          Given a dot bracket string representation, returns an array of paired sites.
static int[] getPairedSitesFromDotBracketString(java.lang.String dotBracketStructure, char openBracket, char closeBracket)
          Given a dot bracket string representation, returns an array of paired sites.
static int[] getPosteriorDecodingConsensusStructure(double[][] basePairProb)
           
static int[] getPosteriorDecodingConsensusStructure(double[][] basePairProb, double[] singleBaseProb)
          A single-threaded method for generating the posterior-decoding structure.
static int[] getPosteriorDecodingConsensusStructure(float[][] basePairProb)
           
 int[] getPosteriorDecodingConsensusStructureMultiThreaded(double[][] basePairProb)
          Returns the posterior-decoding consensus structure.
 int[] getPosteriorDecodingConsensusStructureMultiThreaded(double[][] basePairProb, double[] singleBaseProb)
          Returns the posterior-decoding consensus structure.
 int[] getPosteriorDecodingConsensusStructureMultiThreaded(float[][] basePairProb)
           
static java.lang.String getReferenceSequence(java.util.ArrayList<java.lang.String> sequences, int refLength)
           
static java.lang.String getSequenceByName(java.lang.String seqName, java.util.List<java.lang.String> sequences, java.util.List<java.lang.String> sequenceNames)
           
static double[] getSingleBaseCount(java.lang.String[] dotBracketStructures)
          Given a String array of dot-bracket structures, fills a vector which counts the number of times a specific nucleotide position is unpaired.
static double[] getSingleBaseProb(double[][] basePairProb)
           
static boolean isRNAalignment(java.util.ArrayList<java.lang.String> sequences)
           
static void loadFastaSequences(java.io.File file, java.util.ArrayList<java.lang.String> sequences, java.util.ArrayList<java.lang.String> sequenceNames)
           
static void loadFastaSequences(java.io.File file, java.util.ArrayList<java.lang.String> sequences, java.util.ArrayList<java.lang.String> sequenceNames, int max)
           
static double[][] loadMatrix(java.io.File bpFile)
           
static void main(java.lang.String[] args)
           
static java.lang.String pad(java.lang.String s, int length)
          A helper method which pads or truncates a string to a specific length.
 RNAFoldingTools.MultiThreadedPosteriorDecoding performPosteriorDecodingMultiThreaded(double[][] basePairProb)
          Performs posterior-decoding using multi-threading.
 RNAFoldingTools.MultiThreadedPosteriorDecoding performPosteriorDecodingMultiThreaded(double[][] basePairProb, double[] singleBaseProb)
          Performs posterior-decoding using multi-threading.
static void printMatrix(double[][] matrix)
          A helper method which prints double matrices.
static void saveCtFile(java.io.File outFile, int[] pairedSites, java.lang.String header, java.lang.String sequence)
           
static void saveDotBracketFile(java.io.File outFile, int[] pairedSites, java.lang.String header, java.lang.String sequence)
           
 void test()
           
static void traceBack(int[][] S, int i, int j, int[] pairedWith)
           
static void writeMatrix(double[][] matrix, java.io.File file)
          A helper method which writes double matrices to files.
static void writeMatrix(int[][] matrix, java.io.File file)
          A helper method which writes integer matrices to files.
static void writeToFile(java.io.File f, java.lang.String s, boolean append)
           
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

emptyValue

public static final double emptyValue
See Also:
Constant Field Values
Constructor Detail

RNAFoldingTools

public RNAFoldingTools()
Method Detail

main

public static void main(java.lang.String[] args)

getSingleBaseProb

public static double[] getSingleBaseProb(double[][] basePairProb)

getPosteriorDecodingConsensusStructure

public static int[] getPosteriorDecodingConsensusStructure(float[][] basePairProb)

getPosteriorDecodingConsensusStructure

public static int[] getPosteriorDecodingConsensusStructure(double[][] basePairProb)

getPosteriorDecodingConsensusStructure

public static int[] getPosteriorDecodingConsensusStructure(double[][] basePairProb,
                                                           double[] singleBaseProb)
A single-threaded method for generating the posterior-decoding structure.

Parameters:
basePairProb - a NxN matrix of base-pairing probabilities.
singleBaseProb - an array of length N representing probabilities for unpaired bases.
Returns:
an array of paired positions. Where (i, array[i]) represents a nucleotide pairing between nucleotides (i+1, array[i]), if array[i] = 0, then (i+1) is unpaired.

performPosteriorDecodingMultiThreaded

public RNAFoldingTools.MultiThreadedPosteriorDecoding performPosteriorDecodingMultiThreaded(double[][] basePairProb,
                                                                                            double[] singleBaseProb)
Performs posterior-decoding using multi-threading.

Parameters:
basePairProb - a NxN matrix of base-pairing probabilities.
singleBaseProb - an array of length N representing probabilities for unpaired bases.
Returns:
an instance of MultiThreadedPosteriorDecoding, which has public access to various useful variables (e.g. paired nucleotides of the MEA structure).

performPosteriorDecodingMultiThreaded

public RNAFoldingTools.MultiThreadedPosteriorDecoding performPosteriorDecodingMultiThreaded(double[][] basePairProb)
Performs posterior-decoding using multi-threading.

Parameters:
basePairProb - a NxN matrix of base-pairing probabilities. Assumes that sum(row) <= 1, in order to calculate the single base probabilities.
Returns:
an instance of MultiThreadedPosteriorDecoding, which has public access to various useful variables (e.g. paired nucleotides of the MEA structure).

getPosteriorDecodingConsensusStructureMultiThreaded

public int[] getPosteriorDecodingConsensusStructureMultiThreaded(double[][] basePairProb,
                                                                 double[] singleBaseProb)
Returns the posterior-decoding consensus structure.

Parameters:
basePairProb - a NxN matrix of base-pairing probabilities.
Returns:
the posterior-decoding consensus structure.

getPosteriorDecodingConsensusStructureMultiThreaded

public int[] getPosteriorDecodingConsensusStructureMultiThreaded(float[][] basePairProb)

getPosteriorDecodingConsensusStructureMultiThreaded

public int[] getPosteriorDecodingConsensusStructureMultiThreaded(double[][] basePairProb)
Returns the posterior-decoding consensus structure.

Parameters:
basePairProb - a NxN matrix of base-pairing probabilities. Assumes that sum(row) <= 1, in order to calculate the single base probabilities.
Returns:
the posterior-decoding consensus structure.

traceBack

public static void traceBack(int[][] S,
                             int i,
                             int j,
                             int[] pairedWith)

getSingleBaseCount

public static double[] getSingleBaseCount(java.lang.String[] dotBracketStructures)
Given a String array of dot-bracket structures, fills a vector which counts the number of times a specific nucleotide position is unpaired. For testing purposes only.

Parameters:
dotBracketStructures -
Returns:
an array of unpaired counts.

getBasePairCountMatrix

public static double[][] getBasePairCountMatrix(java.lang.String[] dotBracketStructures)
Given an String array of dot-bracket structures, fills a matrix which counts the number of times a pair of nucleotides is base-paired in each structure. For testing purposes only.

Parameters:
dotBracketStructures -
Returns:
a double matrix containing base-pairing counts.

printMatrix

public static void printMatrix(double[][] matrix)
A helper method which prints double matrices. For testing purposes only.

Parameters:
matrix -

writeMatrix

public static void writeMatrix(double[][] matrix,
                               java.io.File file)
A helper method which writes double matrices to files. For testing purposes only.

Parameters:
matrix -

writeMatrix

public static void writeMatrix(int[][] matrix,
                               java.io.File file)
A helper method which writes integer matrices to files. For testing purposes only.

Parameters:
matrix -

pad

public static java.lang.String pad(java.lang.String s,
                                   int length)
A helper method which pads or truncates a string to a specific length. For testing purposes only.

Parameters:
s -
length -
Returns:
the padded/truncated string.

test

public void test()

getDotBracketStringFromPairedSites

public static java.lang.String getDotBracketStringFromPairedSites(int[] pairedSites)
Returns a dot bracket string representation given an array paired sites.

Parameters:
pairedSites -
Returns:
a dot bracket string representation given an array paired sites.

getPairedSitesFromDotBracketString

public static int[] getPairedSitesFromDotBracketString(java.lang.String dotBracketStructure)
Given a dot bracket string representation, returns an array of paired sites.

Parameters:
dotBracketStructure - a dot bracket string representation of a structure.
Returns:
an array of paired sites.
See Also:
getDotBracketStringFromPairedSites(int[])

getPairedSitesFromDotBracketString

public static int[] getPairedSitesFromDotBracketString(java.lang.String dotBracketStructure,
                                                       char openBracket,
                                                       char closeBracket)
Given a dot bracket string representation, returns an array of paired sites.

Parameters:
dotBracketStructure - a dot bracket string representation of a structure.
openBracket - the opening bracket.
closeBracket - the closing bracket.
Returns:
an array of paired sites.
See Also:
getDotBracketStringFromPairedSites(int[])

getReferenceSequence

public static java.lang.String getReferenceSequence(java.util.ArrayList<java.lang.String> sequences,
                                                    int refLength)

getDotBracketStringFromCtFile

public static java.lang.String getDotBracketStringFromCtFile(java.io.File ctFile)

getSequenceByName

public static java.lang.String getSequenceByName(java.lang.String seqName,
                                                 java.util.List<java.lang.String> sequences,
                                                 java.util.List<java.lang.String> sequenceNames)

getPairedSitesFromCtFile

public static int[] getPairedSitesFromCtFile(java.io.File ctFile)

getPairedSitesFromDBNStringFile

public static int[] getPairedSitesFromDBNStringFile(java.io.File dbnFile)

loadMatrix

public static double[][] loadMatrix(java.io.File bpFile)

getDoubleMatrix

public static double[][] getDoubleMatrix(float[][] matrix)

getFloatMatrix

public static float[][] getFloatMatrix(double[][] matrix)

loadFastaSequences

public static void loadFastaSequences(java.io.File file,
                                      java.util.ArrayList<java.lang.String> sequences,
                                      java.util.ArrayList<java.lang.String> sequenceNames)

loadFastaSequences

public static void loadFastaSequences(java.io.File file,
                                      java.util.ArrayList<java.lang.String> sequences,
                                      java.util.ArrayList<java.lang.String> sequenceNames,
                                      int max)

calculatePPfoldReliabilityScore

public static double calculatePPfoldReliabilityScore(int[] pairedSites,
                                                     double[][] basePairProb)

calculatePairsOnlyReliabilityScore

public static double calculatePairsOnlyReliabilityScore(int[] pairedSites,
                                                        double[][] basePairProb)

calculatePairsOnlyReliabilityScore

public static double calculatePairsOnlyReliabilityScore(int[] pairedSites,
                                                        double[][] basePairProb,
                                                        java.util.ArrayList<java.lang.Double> weights)

writeToFile

public static void writeToFile(java.io.File f,
                               java.lang.String s,
                               boolean append)

saveCtFile

public static void saveCtFile(java.io.File outFile,
                              int[] pairedSites,
                              java.lang.String header,
                              java.lang.String sequence)

saveDotBracketFile

public static void saveDotBracketFile(java.io.File outFile,
                                      int[] pairedSites,
                                      java.lang.String header,
                                      java.lang.String sequence)

isRNAalignment

public static boolean isRNAalignment(java.util.ArrayList<java.lang.String> sequences)