/**************************************************************************** // Author: Kane Tse (KT) // Overview: Creates a Tree to contain all GO nodes and organize them // this tree is only an accessor class, in that it does not // store any information about the GO tree itself, except // for the location of the root. // // Information about the tree structure (child/parent/part of) // relationships can be found in the Node class // Modifications: // June/19/01 - Kane - Created /***************************************************************************/ package goview; import java.io.*; import java.util.Hashtable; import java.util.StringTokenizer; import java.util.Vector; import java.util.Enumeration; public class GOTree { // private instance variables private Node root = null; // some GO terms have more than one ID, so this hashtable should only // store a single copy of each node (by it's primary GO ID) private Hashtable allNodes = new Hashtable(10000); // translation table to translate secondary GO IDs and GO terms to // primary GO IDs. We don't store nodes here in order to reduce // space complexity. private Hashtable names_GOids = new Hashtable(10000); /************************************************************************* * * Method: GOTree() - class constructor * Purpose: Creates a GO Tree by reading from a flat file * Parameters: FileReader - a file reader object created from the GO flat-file * Returns: None. Constructors cannot return any values * ************************************************************************/ GOTree(FileReader goFileReader) throws Exception { BufferedReader goFileBufferedReader = new BufferedReader(goFileReader); String currentLine; // the current line of the flat file Vector currentBranch = new Vector(); // this Vector contains a list of all nodes from the root of the // tree all the until the current node. It is used for determining // the parent of the current node. (Current node = the node // that is listed on the current line of the text file) try { // keep reading the file until we get to the end of it while (((currentLine = goFileBufferedReader.readLine()) != null) && !(currentLine.equals("$"))) { // skip comment lines if (currentLine.indexOf('!') == 0) continue; int level; int dlrLevel, pctLevel, grtLevel; // the level of the node is determined by the number // of spaces in the file before the first character // be it dollar sign, percent symbol or greater than // symbol dlrLevel = currentLine.indexOf('$'); pctLevel = currentLine.indexOf('%') > 0 ? currentLine.indexOf('%') : 999999; grtLevel = currentLine.indexOf('<') > 0 ? currentLine.indexOf('<') : 999999; if (dlrLevel == 0) level = dlrLevel; else { level = pctLevel < grtLevel ? pctLevel : grtLevel; } Node newNode = null; if (level == 0) { // create the root of the GOTree newNode = createNode(currentLine, null); root = newNode; } else { // this is not the root of the GO treem so deal // with parents and the like int previousLevel = currentBranch.size() - 1; if (level < previousLevel) { // this new node is not a child of the node // listed on the previous line // remove all nodes that do not belong on // this branch of the tree. 'This Branch' is // defined as the single path leading from // root to this node as read from the flat-file /* Use this section once JDK1.2 is installed currentBranch.removeRange(level, previousLevel+1); */ // remove this for-loop when JDK1.2 is used for (int index = previousLevel; index >= level; index--) { currentBranch.removeElementAt(index); } } else if (level == previousLevel) { // this new node is not a child of the node // listed on the previous line; but they // share the same parent // remove the node on the previous level // from the 'branch' currentBranch.removeElementAt(level); } // create the new node, and send a reference to its // parent so that relationships can be set-up // correctly. newNode = (Node) createNode(currentLine, (Node) currentBranch.elementAt(level-1)); } // append this node to the end of the current branch. // this will be used if the node on the next line is a // child of this node. currentBranch.addElement(newNode); if (newNode != null) { String goID = newNode.getGOid(); // make references to the node in this GOTree object allNodes.put(goID, newNode); // provide a 'GO Term' to 'GO ID' translation table names_GOids.put(newNode.getDesc(), goID); // provide a secondary GO ID to primary GO ID // reference, also in the same table that translates // GO Terms to GO IDs; for each and every secondary // GO ID Enumeration sgIDEnum = newNode.getSecondaryGONumbers().elements(); while (sgIDEnum.hasMoreElements()) { names_GOids.put(sgIDEnum.nextElement(), goID); } } } } catch (IOException ioExc) { System.err.println("An exception has occurred in GOTree::GOTree()."); ioExc.printStackTrace(); throw new Exception(); } // end catch(IOExc) } // end constructor /************************************************************************* * * Method: createNode() * Purpose: Creates a node in the GO Tree, which represents a single GO * term. * Parameters: String line - the line containing this node from the flat-file * Node parent - a reference to the parent of this node, for * setting parent-child relationships appropriately * Returns: Node. Returns the newly-created Node object. * ************************************************************************/ private Node createNode(String line, Node parent) { // split the flat-file line into tokens by GO Terms (node, parents..) StringTokenizer st = new StringTokenizer(line, "$%<\n", false); String nodeDesc = "(Unknown)"; // GO term String nodeGOid = "(Unknown)"; // primary GO ID Vector parentGOids = new Vector(); // GO IDs for each distinct parent String nodeLine = ""; // stores the GO term, whether is it the // current node, parent or parts/of boolean firstNode = true; // first GO term on the line? Node newNode = null; // create a dummy node in the case of the root // this helps us to avoid null pointer references later on // PS. This is a cheap hack. if (parent == null) { parent = new Node(); } while (st.hasMoreTokens()) { nodeLine = ((String) st.nextToken()).trim(); // trim whitespaces nodeLine = nodeLine.trim(); // avoid null-length attributes if (nodeLine.length() == 0) continue; // break up each GO term by attributes (GO Term, GO ID, DB ref) StringTokenizer node = new StringTokenizer(nodeLine, ",;"); while (node.hasMoreTokens()) { String attribute = ((String) node.nextToken()).trim(); if (firstNode == true) { // if this is the first node, then the GO description // should be stored if (attribute.indexOf(':') == -1) { nodeDesc = attribute; } else if ((attribute.indexOf("GO:") == 0)) { // extract the GO ID from the line if (nodeGOid.equals("(Unknown)")) { nodeGOid = attribute.substring(3); } else { nodeGOid += ", " + attribute; } try { Integer.valueOf(attribute.substring(3)); if (line.trim().charAt(0) == '%') { parent.addChild(nodeGOid.substring(0,7)); } else if (line.trim().charAt(0) == '<') { parent.addPart(nodeGOid.substring(0,7)); } } catch (NumberFormatException nfExc) { // nfExc.printStackTrace(); System.err.println("WARNING: On the following line, a GO: value is not followed by a valid number (" + nodeGOid + "):\n" + nodeLine); } } } else { // in the case of parents (or part of relationships) // we only need to use the GO: ID to identify // which node is the parent if (attribute.indexOf("GO:") == 0) { nodeGOid = attribute.substring(3); parentGOids.addElement(nodeGOid.substring(0,7)); } } // this line assumes that node's GO ID will come before // its synomym if (attribute.indexOf("synonym:") == 0) { names_GOids.put(attribute.substring(8), nodeGOid.substring(0,7)); } } // end while-loop // create a new node only if this is the first node on the // list; otherwise this GO Term is just indicating a parent // or part-of relationship. if (firstNode == true) { String trimmedLine = line.trim(); newNode = getNode(nodeGOid.substring(0,7)); if (newNode == null) { newNode = new Node(nodeDesc, nodeGOid, parent.getGOid(), trimmedLine.charAt(0), line); } } else { } firstNode = false; } // associate "other" parents GOids with this node Enumeration enum = (Enumeration) parentGOids.elements(); while(enum.hasMoreElements()) { String parentGOid = (String) enum.nextElement(); newNode.addParent(parentGOid); } return newNode; } /************************************************************************* * * Method: getNode() * Purpose: Retreives a node from GOTree, given a (complete) GO Term * or primary or secondary GO ID. * Parameters: String searchTerm - the node to retrieve, either a GO Term * or a GO ID. If a GO Term is used; it must be an exact * match * Returns: Node. Returns the requested Node object; or null if not found * ************************************************************************/ public Node getNode(String searchTerm) { // perform an inital search by primary GO ID Node node = (Node) allNodes.get(searchTerm); // if we can't find the node, look up the secondary GO ID or // GO Description to determine the primary GO ID if (node == null) { String goID = (String) names_GOids.get(searchTerm); if (goID != null) { node = (Node) allNodes.get(goID); } else { node = null; } } return node; } /************************************************************************* * * Method: getNodesMatching() * Purpose: Retrieves all nodes matching a partial string. * Parameters: String searchTerm - the node to retrieve, either a GO Term * or a GO ID. If a GO Term is used; it must be an exact * match * Returns: Vector. A vector list of all node matchin the input searchTerm * Note: This method must iterate through each and every node in the GO * tree, so it is very slow compared to getNode() * ************************************************************************/ public Vector getNodesMatching(String searchTerm) { Vector matchingNodes = new Vector(); // look for all matching GO IDs by iterating through every node's // description Enumeration goIDEnum = allNodes.keys(); while (goIDEnum.hasMoreElements()) { String goID = (String) goIDEnum.nextElement(); if (goID.indexOf(searchTerm) != -1) { matchingNodes.addElement((Node) allNodes.get(goID)); } } if (matchingNodes.size() == 0) { // look for all matching descriptions Enumeration goDescEnum = names_GOids.keys(); while (goDescEnum.hasMoreElements()) { String goDesc = (String) goDescEnum.nextElement(); if (goDesc.indexOf(searchTerm) != -1) { matchingNodes.addElement((Node) allNodes.get((String) names_GOids.get(goDesc))); } } } return matchingNodes; } /************************************************************************* * * Method: getRoot() * Purpose: Retrieves the root node of the GO Tree. * Parameters: None. * Returns: Node. The root of the GO Tree; null if the root is not defined * Note: This method is useful if one needs to traverse the tree in * anyway. * ************************************************************************/ public Node getRoot() { return root; } /************************************************************************* * * Method: getParents() * Purpose: Retrieves the parents of a single node. * Parameters: String searchTerm. The GO ID or GO description of the node * that you are seeking the parents of. * Returns: Vector. All the parent nodes of the current node (only * traverses one level upwards). * * If the node cannot be found, null is returned. * * If there are no parents, an empty vector is returned. ************************************************************************/ public Vector getParents(String searchTerm) { Node startNode = getNode(searchTerm); if (startNode == null) return null; return startNode.getParents(); } /************************************************************************* * * Method: getChildren() * Purpose: Retrieves the children of a single node. * Parameters: String searchTerm. The GO ID or GO description of the node * that you are seeking the childen of. * Returns: Vector. All the children nodes of the current node (only * traverses one level downwards) * * If the node cannot be found, null is returned. * * If there are no children, an empty vector is returned. * ************************************************************************/ public Vector getChildren(String searchTerm) { Node startNode = getNode(searchTerm); if (startNode == null) return null; return startNode.getChildren(); } /************************************************************************* * * Method: getAllNodeKeys() * Purpose: Retrieves the node keys (primary GO IDs) of all terms. * Parameters: None. * Returns: Enumeration. Contains all keys for the tree. * * If nothing is found, an empty Enumeration object is returned. ************************************************************************/ public Enumeration getAllNodeKeys() { return allNodes.keys(); } /************************************************************************* * * Method: getParentsString() * Purpose: Retrieves the node keys (primary GO IDs) of all parents * of a given GO Term * Parameters: Node node. The GO Term whose parents are sought. * Returns: String. GO Terms of all parents, as listed in the flat-file. * If the node cannot be found, or this node has no parents, * then an empty String is returned. * Note: This method only locates parents which are one-level higher * than the specified node. * ************************************************************************/ public String getParentsString(Node node) { String output = new String(); if (node == null) return output; Enumeration pEnum = (Enumeration) node.getParents().elements(); while (pEnum.hasMoreElements()) { String parentGOid = (String) pEnum.nextElement(); Node parent = getNode(parentGOid); try { output += FileReaderDemo.cleanLine(parent.getFlatFileLine()) + "\n"; } catch (NullPointerException npExc) { System.err.println("The node for the following GO ID could not be found"); System.err.println(" GO ID>" + parentGOid + "<"); } } return output; } /************************************************************************* * * Method: getPartOfString() * Purpose: Retrieves a string representing all GO Terms that this * GO Term is a "part of" * Parameters: Node - the Node whose greater parts you wish to retrieve * Returns: String - all part of nodes, in the GO Flat-file format * Example: If "nuclear membrane" is part of "nucleus" and "cytoplasm" * then passing the "nuclear membrane" node to this function * would return the flat-file string for "nucleus" and * "cytoplasm". The two nodes would be delimited by a \n. * * Passing "nucleus" or "cytoplasm" would NOT return * "nuclear membrane", but probably some other terms in which * "nucleus" or "cytoplasm" were a part of. * * If the node cannot be found, or this node has no parents, * then an empty String is returned. ************************************************************************/ public String getPartOfString(Node node) { String output = new String(); if (node == null) return output; Enumeration prtOfEnum = (Enumeration) node.getPartOfs().elements(); while (prtOfEnum.hasMoreElements()) { Node largerPart = getNode((String) prtOfEnum.nextElement()); try { output += ("%" + FileReaderDemo.cleanLine(largerPart.getFlatFileLine().trim()).substring(1) + "\n"); } catch (NullPointerException npExc) { System.err.println("The node for the following GO ID could not be found"); System.err.println(" GO ID>" + largerPart.getGOid() + "<"); } } return output; } /************************************************************************* * * Method: getNodeDescString() * Purpose: Returns the GO Term, GO ID and external DB reference for a * single node * Parameters: None. * Returns: Enumeration. Contains all keys for the tree. * ************************************************************************/ public String getNodeDescString(Node node) { String output = new String(); if (node == null) return output; output += (" " + FileReaderDemo.cleanLine(node.getFlatFileLine()) + "\n"); return output; } /************************************************************************* * * Method: getChildrenString() * Purpose: Retrieves the node keys (primary GO IDs) of all children * of a given GO Term * Parameters: Node node. The GO Term whose children are sought. * Returns: String. GO Terms of all children, as listed in the flat-file. * If the node cannot be found, or this node has no children, * then an empty String is returned. * Note: This method only locates children which are one-level below * the specified node. * ************************************************************************/ public String getChildrenString(Node node) { String output = new String(); if (node == null) return output; Enumeration cEnum = (Enumeration) node.getChildren().elements(); while (cEnum.hasMoreElements()) { String childGOid = (String) cEnum.nextElement(); Node child = getNode(childGOid); try { output += (" " + "%" + FileReaderDemo.cleanLine(child.getFlatFileLine().trim()).substring(1) + "\n"); } catch (NullPointerException npExc) { System.err.println("The node for the following GO ID could not be found"); System.err.println(" GO ID>" + childGOid + "<"); } } return output; } /************************************************************************* * * Method: getPartsString() * Purpose: Retrieves a string representing parts of a GO Term * Parameters: Node node - the Node whose parts you wish to retrieve * Returns: String - all nodes that are part of node, in the GO * Flat-file format * Example: If "nuclear membrane" is part of "nucleus" and "cytoplasm" * then passing the "nuclear membrane" node to this function * would NOT return the flat-file string for "nucleus" and * "cytoplasm". * * However, passing the "nucleus" node would return * "nuclear membrane", along with some other terms in which * are a part of "nucleus". * * If the node cannot be found, or this node has no parents, * then an empty String is returned. ************************************************************************/ public String getPartsString(Node node) { String output = new String(); if (node == null) return output; Enumeration prtEnum = (Enumeration) node.getParts().elements(); while (prtEnum.hasMoreElements()) { Node part = getNode((String) prtEnum.nextElement()); try { output += (" " + "<" + FileReaderDemo.cleanLine(part.getFlatFileLine().trim()).substring(1) + "\n"); } catch (NullPointerException npExc) { System.err.println("The node for the following GO ID could not be found"); System.err.println(" GO ID>" + part.getGOid() + "<"); } } return output; } } // END of GOTree.java /***************************************************************************/