package jasper; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; public class SparseSimilarityMatrix { /*--------------------------------------------------------------*/ /*---------------- Initialization ----------------*/ /*--------------------------------------------------------------*/ /** * Takes in a file of sketch similarity percentages from SketchCompare. * Returns a sparse matrix object containing each percentage * * @param inputFile The file containing pairwise comparisons of each sketch * @throws FileNotFoundException * @throws IOException */ public SparseSimilarityMatrix(String inputFile, SparseTree tree_) throws FileNotFoundException, IOException { //Assigns the input tree object to the tree variable. tree = tree_; //Take file name as input for building tree of related nodes in = inputFile; //Read in file, add header line and add to header variable try (BufferedReader br = new BufferedReader(new FileReader(in))) { String line; //while line isn't empty, process while ((line = br.readLine()) != null) { //if line is the header line, split and assign to variable. //may be used when header becomes more complex if(line.startsWith("#")) {header=line.split("\t"); } else { //If not a header line, split on tab. String[] data = line.split("\t"); //Query organism is column 0. String queryName = data[0]; //String refName = data[1]; } } } //Get the total number of organisms in the tree. orgCount = tree.getOrgCount(); //Initialize the matrix with the appropriate size of all nodes. sparseMatrix = new ArrayList[orgCount + 1]; //Iterate over the matrix and add an ArrayList to each ArrayList. for(int i=0; i(); } //Begin reading the file a second time. try (BufferedReader br = new BufferedReader(new FileReader(in))) { String line; //while line isn't empty, process while ((line = br.readLine()) != null) { //If line is the header line, split and assign to variable. //may be used when header becomes more complex if(line.startsWith("#")) {assert true; } else { //If not a header line, split on tab. String[] data = line.split("\t"); //Column 0 is query name. String queryName = data[0]; //Column 1 is reference name. String refName = data[1]; //Column 2 is the similarity percentage. double similarity = Double.parseDouble(data[2]); //Check that both names are in the HashMap (too slow?) if(tree.containsName(queryName)==true && tree.containsName(refName)) { //Get the positions assigned to both organisms. int queryPos = nameToNodeId(queryName); int refPos = nameToNodeId(refName); Comparison currentComparison = new Comparison(queryPos, refPos, similarity); //Add the similarity percentage to the appropriate matrix position. sparseMatrix[queryPos].add(currentComparison); } } } } } /** * Method for taking the node name and returning the node ID value * @param orgName the organism node name (String). * @return int The node ID of the organism name taken as input. */ public int nameToNodeId(String orgName) { //Get the node associated with the input name. TreeNode org = tree.getNode(orgName); //Asserts the org nod is in the tree. assert(org != null) : orgName; //Return the int node ID. return org.nodeId; } /** * Prints out the entire matrix. * Impractical in cases of large input datasets. * */ public String toString() { StringBuilder sb=new StringBuilder(); for (int i = 0; i < sparseMatrix.length; i++) { for (int j = 0; j < sparseMatrix[i].size(); j++) { sb.append(sparseMatrix[i].get(j) + " "); } sb.append('\n'); } return sb.toString(); } //TODO: This method is slow and doesnt work, need something better. // /** // * Returns the similarity of two specified organisms. // * Both organisms must have been compared using SketchCompare. // * // * @param org1 The Name of an organism. // * @param org2 The name of a second organism. // * @return similarity The Double percentage similarity between the two sketches. // */ // public Comparison getComparison(String org1, String org2) { // int orgName1 = nameToNodeId(org1); // int orgName2 = nameToNodeId(org2); // // return sparseMatrix[orgName1].get(orgName2); // } public int getSize() { return orgCount; } public ArrayList getOrgRow(String orgName) { int rowNum = tree.nodeMap.get(orgName).nodeId; return sparseMatrix[rowNum]; } /*--------------------------------------------------------------*/ /*---------------- Fields ----------------*/ /*--------------------------------------------------------------*/ /** * A SparseTree object that contains taxonomic information relevant to this matrix. */ final SparseTree tree; /** * An arraylist containing comparisons between nodes in the tree. */ private final ArrayList[] sparseMatrix; /** * The number of sketches being analyzed. */ private int orgCount; /** * ArrayList that will hold the lines of the input file. */ ArrayList lines = new ArrayList(); /** * Header line of the comparison input file. */ private String[] header; /** * Input file name. */ private String in=null; /** * Number of lines processed from the sketch comparison file. */ private long linesProcessed=0; }