/****************************************************************************

  // Author: Kane Tse (KT)
  // Overview:  Creates a Tree to contain all GO nodes and organize them
  //            this tree is only an accessor class, in that it does not
  //            store any information about the GO tree itself, except
  //            for the location of the root.
  //
  //            Information about the tree structure (child/parent/part of)
  //            relationships can be found in the Node class
  // Modifications:
  // June/19/01 - Kane - Created

/***************************************************************************/

package goview;

import java.io.*;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.Vector;
import java.util.Enumeration;

public class GOTree {

     // private instance variables
     private Node root = null;

     // some GO terms have more than one ID, so this hashtable should only
     //   store a single copy of each node (by it's primary GO ID)
     private Hashtable allNodes = new Hashtable(10000);

     // translation table to translate secondary GO IDs and GO terms to
     //   primary GO IDs.  We don't store nodes here in order to reduce
     //   space complexity.
     private Hashtable names_GOids = new Hashtable(10000);

/*************************************************************************
 *
 * Method:     GOTree() - class constructor
 * Purpose:    Creates a GO Tree by reading from a flat file
 * Parameters: FileReader - a file reader object created from the GO flat-file
 * Returns:    None.  Constructors cannot return any values
 *
 ************************************************************************/
     GOTree(FileReader goFileReader) throws Exception {
	  BufferedReader goFileBufferedReader 
	       = new BufferedReader(goFileReader);

	  String currentLine; // the current line of the flat file
	  Vector currentBranch = new Vector(); 
	  // this Vector contains a list of all nodes from the root of the
	  //   tree all the until the current node.  It is used for determining
	  //   the parent of the current node.  (Current node = the node
	  //   that is listed on the current line of the text file)
     
	  try {
	       // keep reading the file until we get to the end of it
	       while (((currentLine = goFileBufferedReader.readLine()) != null)
		      && !(currentLine.equals("$"))) {
		    
		    // skip comment lines
		    if (currentLine.indexOf('!') == 0) continue;
		    
		    int level;
		    int dlrLevel, pctLevel, grtLevel;

		    // the level of the node is determined by the number
		    //   of spaces in the file before the first character
		    //   be it dollar sign, percent symbol or greater than
		    //   symbol
		    dlrLevel = currentLine.indexOf('$');
		    pctLevel = currentLine.indexOf('%') > 0 
			 ? currentLine.indexOf('%') : 999999;
		    grtLevel = currentLine.indexOf('<') > 0
			 ? currentLine.indexOf('<') : 999999;

		    if (dlrLevel == 0) level = dlrLevel;
		    else {
			 level = pctLevel < grtLevel ? pctLevel : grtLevel;
		    }

		    Node newNode = null;

		    if (level == 0) {
			 // create the root of the GOTree
			 newNode = createNode(currentLine, null);
			 root = newNode;
		    }
		    else {
			 // this is not the root of the GO treem so deal
			 //   with parents and the like
			 int previousLevel = currentBranch.size() - 1;
			 
			 if (level < previousLevel) {
			      // this new node is not a child of the node
			      //   listed on the previous line
			      

			      // remove all nodes that do not belong on 
			      //   this branch of the tree.  'This Branch' is
			      //   defined as the single path leading from
			      //   root to this node as read from the flat-file

			      /* Use this section once JDK1.2 is installed
			      currentBranch.removeRange(level, 
							previousLevel+1);
							*/

			      // remove this for-loop when JDK1.2 is used
			      for (int index = previousLevel;
				   index >= level;
				   index--) {
				   currentBranch.removeElementAt(index);
			      }
			 }
			 else if (level == previousLevel) {
			      // this new node is not a child of the node
			      //   listed on the previous line; but they
			      //   share the same parent
			      
			      // remove the node on the previous level
			      //   from the 'branch'
			      currentBranch.removeElementAt(level);
			 }
			 
			 // create the new node, and send a reference to its
			 //   parent so that relationships can be set-up
			 //   correctly.

			 newNode 
			      = (Node) createNode(currentLine, 
						  (Node) currentBranch.elementAt(level-1));

		    }

		    // append this node to the end of the current branch.
		    //   this will be used if the node on the next line is a
		    //   child of this node.
		    currentBranch.addElement(newNode);
		    if (newNode != null) {
			 String goID = newNode.getGOid();

			 // make references to the node in this GOTree object
			 allNodes.put(goID, 
				      newNode);

			 // provide a 'GO Term' to 'GO ID' translation table
			 names_GOids.put(newNode.getDesc(),
					 goID);

			 // provide a secondary GO ID to primary GO ID
			 //   reference, also in the same table that translates
			 //   GO Terms to GO IDs; for each and every secondary
			 //   GO ID
			 Enumeration sgIDEnum 
			      = newNode.getSecondaryGONumbers().elements();
			 while (sgIDEnum.hasMoreElements()) {
			      names_GOids.put(sgIDEnum.nextElement(),
					      goID);
			 }
		    }
	       }
	  }
	  catch (IOException ioExc) {
	       System.err.println("An exception has occurred in GOTree::GOTree().");
	       ioExc.printStackTrace();
	       throw new Exception();
	  } // end catch(IOExc)
     } // end constructor

/*************************************************************************
 *
 * Method:     createNode()
 * Purpose:    Creates a node in the GO Tree, which represents a single GO
 *               term.
 * Parameters: String line - the line containing this node from the flat-file
 *             Node parent - a reference to the parent of this node, for
 *               setting parent-child relationships appropriately
 * Returns:    Node. Returns the newly-created Node object.
 *
 ************************************************************************/
     private Node createNode(String line, Node parent) {

	  // split the flat-file line into tokens by GO Terms (node, parents..)
	  StringTokenizer st = new StringTokenizer(line, "$%<\n", false);

	  String nodeDesc = "(Unknown)"; // GO term
	  String nodeGOid = "(Unknown)"; // primary GO ID
	  Vector parentGOids = new Vector(); // GO IDs for each distinct parent

	  String nodeLine = ""; // stores the GO term, whether is it the
                            	  // current node, parent or parts/of
	  boolean firstNode = true; // first GO term on the line?
	  Node newNode = null;

	  // create a dummy node in the case of the root
	  //   this helps us to avoid null pointer references later on
	  //   PS.  This is a cheap hack.
	  if (parent == null) {
	       parent = new Node();
	  }
	  
	  while (st.hasMoreTokens()) {
	       
	       nodeLine = ((String) st.nextToken()).trim();

	       // trim whitespaces
	       nodeLine = nodeLine.trim();

	       // avoid null-length attributes
	       if (nodeLine.length() == 0) continue;

	       // break up each GO term by attributes (GO Term, GO ID, DB ref)
	       StringTokenizer node = new StringTokenizer(nodeLine, ",;");

	       while (node.hasMoreTokens()) {	       
		    String attribute = ((String) node.nextToken()).trim();

		    if (firstNode == true) {

			 // if this is the first node, then the GO description
			 //   should be stored
			 if (attribute.indexOf(':') == -1) {
			      nodeDesc = attribute;
			 }
			 else if ((attribute.indexOf("GO:") == 0)) {
			      // extract the GO ID from the line
			      if (nodeGOid.equals("(Unknown)")) {
				   nodeGOid = attribute.substring(3);
			      }
			      else {
				   nodeGOid += ", " + attribute;
			      }

			      try {
				   Integer.valueOf(attribute.substring(3));

				   if (line.trim().charAt(0) == '%') {
					parent.addChild(nodeGOid.substring(0,7));
				   }
				   else if (line.trim().charAt(0) == '<') {
					parent.addPart(nodeGOid.substring(0,7));
				   }

			      }
			      catch (NumberFormatException nfExc) {
				   // nfExc.printStackTrace();
				   System.err.println("WARNING: On the following line, a GO: value is not followed by a valid number (" + nodeGOid + "):\n" +
						      nodeLine);
			      }
			 }			 
		    }
		    else {
			 // in the case of parents (or part of relationships)
			 //   we only need to use the GO: ID to identify
			 //   which node is the parent
			 if (attribute.indexOf("GO:") == 0) {
			      nodeGOid = attribute.substring(3);
			      parentGOids.addElement(nodeGOid.substring(0,7));
			 }
		    }
		    // this line assumes that node's GO ID will come before
		    //   its synomym
		    if (attribute.indexOf("synonym:") == 0) {
			 names_GOids.put(attribute.substring(8),
					 nodeGOid.substring(0,7));
		    }
	       } // end while-loop

	       // create a new node only if this is the first node on the
	       //   list; otherwise this GO Term is just indicating a parent
	       //   or part-of relationship.
	       if (firstNode == true) {
		    String trimmedLine = line.trim();

		    newNode = getNode(nodeGOid.substring(0,7));
		    if (newNode == null) {
			 newNode = new Node(nodeDesc,
					    nodeGOid,
					    parent.getGOid(),
					    trimmedLine.charAt(0),
					    line);			 
		    }
	       }
	       else {
	       }
		    
	       firstNode = false;	      
	  }

	  // associate "other" parents GOids with this node
	  Enumeration enum = (Enumeration) parentGOids.elements();
	  while(enum.hasMoreElements()) {
	       String parentGOid = (String) enum.nextElement();
	       newNode.addParent(parentGOid);
	  }

	  return newNode;
     }

/*************************************************************************
 *
 * Method:     getNode()
 * Purpose:    Retreives a node from GOTree, given a (complete) GO Term
 *               or primary or secondary GO ID.
 * Parameters: String searchTerm - the node to retrieve, either a GO Term
 *               or a GO ID.  If a GO Term is used; it must be an exact
 *               match
 * Returns:    Node. Returns the requested Node object; or null if not found
 *
 ************************************************************************/
     public Node getNode(String searchTerm) {
	  // perform an inital search by primary GO ID
	  Node node = (Node) allNodes.get(searchTerm);

	  // if we can't find the node, look up the secondary GO ID or
	  //   GO Description to determine the primary GO ID
	  if (node == null) {	       
	       String goID = (String) names_GOids.get(searchTerm);

	       if (goID != null) {
		    node = (Node) allNodes.get(goID);
	       }
	       else {
		    node = null;
	       }
	  }
	  return node;
     }

/*************************************************************************
 *
 * Method:     getNodesMatching()
 * Purpose:    Retrieves all nodes matching a partial string.
 * Parameters: String searchTerm - the node to retrieve, either a GO Term
 *               or a GO ID.  If a GO Term is used; it must be an exact
 *               match
 * Returns:    Vector.  A vector list of all node matchin the input searchTerm
 * Note:       This method must iterate through each and every node in the GO
 *               tree, so it is very slow compared to getNode()
 *
 ************************************************************************/
     public Vector getNodesMatching(String searchTerm) {
	Vector matchingNodes = new Vector();

	// look for all matching GO IDs by iterating through every node's
	//   description
	Enumeration goIDEnum = allNodes.keys();
	while (goIDEnum.hasMoreElements()) {
	     String goID = (String) goIDEnum.nextElement();
	     if (goID.indexOf(searchTerm) != -1) {
		  matchingNodes.addElement((Node) allNodes.get(goID));
	     }
	}

	if (matchingNodes.size() == 0) {
	     // look for all matching descriptions
	     Enumeration goDescEnum = names_GOids.keys();
	     while (goDescEnum.hasMoreElements()) {
		  String goDesc = (String) goDescEnum.nextElement();
		  if (goDesc.indexOf(searchTerm) != -1) {
		       matchingNodes.addElement((Node) allNodes.get((String) names_GOids.get(goDesc)));
		  }
	     }
	}
	return matchingNodes;
     }

/*************************************************************************
 *
 * Method:     getRoot()
 * Purpose:    Retrieves the root node of the GO Tree.
 * Parameters: None.
 * Returns:    Node.  The root of the GO Tree; null if the root is not defined
 * Note:       This method is useful if one needs to traverse the tree in
 *               anyway.
 *
 ************************************************************************/
     public Node getRoot() {
	  return root;
     }
     
/*************************************************************************
 *
 * Method:     getParents()
 * Purpose:    Retrieves the parents of a single node.
 * Parameters: String searchTerm.  The GO ID or GO description of the node
 *               that you are seeking the parents of.
 * Returns:    Vector.  All the parent nodes of the current node (only
 *               traverses one level upwards).
 *
 *               If the node cannot be found, null is returned.
 *
 *               If there are no parents, an empty vector is returned.
 ************************************************************************/
     public Vector getParents(String searchTerm) {
	  Node startNode = getNode(searchTerm);
	  if (startNode == null) return null;
	  return startNode.getParents();
     }
     
/*************************************************************************
 *
 * Method:     getChildren()
 * Purpose:    Retrieves the children of a single node.
 * Parameters: String searchTerm.  The GO ID or GO description of the node
 *               that you are seeking the childen of.
 * Returns:    Vector.  All the children nodes of the current node (only
 *               traverses one level downwards)
 *
 *               If the node cannot be found, null is returned.
 *
 *               If there are no children, an empty vector is returned.
 *
 ************************************************************************/
     public Vector getChildren(String searchTerm) {
	  Node startNode = getNode(searchTerm);
	  if (startNode == null) return null;
	  return startNode.getChildren();
     }

/*************************************************************************
 *
 * Method:     getAllNodeKeys()
 * Purpose:    Retrieves the node keys (primary GO IDs) of all terms.
 * Parameters: None.
 * Returns:    Enumeration.  Contains all keys for the tree.
 *
 *               If nothing is found, an empty Enumeration object is returned.
 ************************************************************************/
     public Enumeration getAllNodeKeys() {	  
	  return allNodes.keys();
     }

/*************************************************************************
 *
 * Method:     getParentsString()
 * Purpose:    Retrieves the node keys (primary GO IDs) of all parents
 *               of a given GO Term
 * Parameters: Node node.  The GO Term whose parents are sought.
 * Returns:    String.  GO Terms of all parents, as listed in the flat-file.
 *               If the node cannot be found, or this node has no parents,
 *               then an empty String is returned.
 * Note:       This method only locates parents which are one-level higher
 *               than the specified node.
 *
 ************************************************************************/
     public String getParentsString(Node node) {
	  String output = new String();

	  if (node == null) return output;

	  Enumeration pEnum = (Enumeration) node.getParents().elements();
	  while (pEnum.hasMoreElements()) {
	       String parentGOid = (String) pEnum.nextElement();
	       Node parent = getNode(parentGOid);
	       
	       try {
		    output += FileReaderDemo.cleanLine(parent.getFlatFileLine()) + "\n";
	       }
	       catch (NullPointerException npExc) {
		    System.err.println("The node for the following GO ID could not be found");
		    System.err.println("  GO ID>" + parentGOid + "<");
	       }
	  }
	  return output;
     }

/*************************************************************************
 *
 * Method:     getPartOfString()
 * Purpose:    Retrieves a string representing all GO Terms that this
 *               GO Term is a "part of"
 * Parameters: Node - the Node whose greater parts you wish to retrieve
 * Returns:    String - all part of nodes, in the GO Flat-file format
 * Example:    If "nuclear membrane" is part of "nucleus" and "cytoplasm"
 *               then passing the "nuclear membrane" node to this function
 *               would return the flat-file string for "nucleus" and 
 *               "cytoplasm".  The two nodes would be delimited by a \n.
 *
 *               Passing "nucleus" or "cytoplasm" would NOT return 
 *               "nuclear membrane", but probably some other terms in which
 *               "nucleus" or "cytoplasm" were a part of.
 *
 *               If the node cannot be found, or this node has no parents,
 *               then an empty String is returned.
 ************************************************************************/
     public String getPartOfString(Node node) {
	  
	  String output = new String();

	  if (node == null) return output;

	  Enumeration prtOfEnum = (Enumeration) node.getPartOfs().elements();
	  while (prtOfEnum.hasMoreElements()) {
	       Node largerPart = getNode((String) prtOfEnum.nextElement());
		
	       try {
		    output += ("%" 
			       + FileReaderDemo.cleanLine(largerPart.getFlatFileLine().trim()).substring(1)
			       + "\n");
	       }
	       catch (NullPointerException npExc) {
		    System.err.println("The node for the following GO ID could not be found");
		    System.err.println("  GO ID>" + largerPart.getGOid() + "<");
	       }
	  }
	  return output;

     }

/*************************************************************************
 *
 * Method:     getNodeDescString()
 * Purpose:    Returns the GO Term, GO ID and external DB reference for a
 *               single node
 * Parameters: None.
 * Returns:    Enumeration.  Contains all keys for the tree.
 *
 ************************************************************************/
     public String getNodeDescString(Node node) {
	  String output = new String();

	  if (node == null) return output;

	  output += ("  " 
		    + FileReaderDemo.cleanLine(node.getFlatFileLine()) 
		    + "\n");

	  return output;
     }

/*************************************************************************
 *
 * Method:     getChildrenString()
 * Purpose:    Retrieves the node keys (primary GO IDs) of all children
 *               of a given GO Term
 * Parameters: Node node.  The GO Term whose children are sought.
 * Returns:    String.  GO Terms of all children, as listed in the flat-file.
 *               If the node cannot be found, or this node has no children,
 *               then an empty String is returned.
 * Note:       This method only locates children which are one-level below
 *               the specified node.
 *
 ************************************************************************/
     public String getChildrenString(Node node) {
	  String output = new String();

	  if (node == null) return output;

	  Enumeration cEnum = (Enumeration) node.getChildren().elements();
	  while (cEnum.hasMoreElements()) {
	       String childGOid = (String) cEnum.nextElement();
	       Node child = getNode(childGOid);

	       try {
		    output += ("    " 
			       + "%"
			       + FileReaderDemo.cleanLine(child.getFlatFileLine().trim()).substring(1)
			       + "\n");
	       }
	       catch (NullPointerException npExc) {
		    System.err.println("The node for the following GO ID could not be found");
		    System.err.println("  GO ID>" + childGOid + "<");
	       }
	  }
	  return output;	  
     }

/*************************************************************************
 *
 * Method:     getPartsString()
 * Purpose:    Retrieves a string representing parts of a GO Term
 * Parameters: Node node - the Node whose parts you wish to retrieve
 * Returns:    String - all nodes that are part of node, in the GO 
 *               Flat-file format
 * Example:    If "nuclear membrane" is part of "nucleus" and "cytoplasm"
 *               then passing the "nuclear membrane" node to this function
 *               would NOT return the flat-file string for "nucleus" and 
 *               "cytoplasm".
 *
 *               However, passing the "nucleus" node would return 
 *               "nuclear membrane", along with some other terms in which
 *               are a part of "nucleus".
 *
 *               If the node cannot be found, or this node has no parents,
 *               then an empty String is returned.
 ************************************************************************/
     public String getPartsString(Node node) {
	  String output = new String();

	  if (node == null) return output;

	  Enumeration prtEnum = (Enumeration) node.getParts().elements();
	  while (prtEnum.hasMoreElements()) {
	       Node part = getNode((String) prtEnum.nextElement());

	       try {
		    output += ("    "
			       + "<"
			       + FileReaderDemo.cleanLine(part.getFlatFileLine().trim()).substring(1)
			       + "\n");
	       }
	       catch (NullPointerException npExc) {
		    System.err.println("The node for the following GO ID could not be found");
		    System.err.println("  GO ID>" + part.getGOid() + "<");
	       } 
	    }
	  return output;
     }

}
// END of GOTree.java 
/***************************************************************************/