/***************************************************************************
 *   Copyright (C) 2005 by Andreas Pokorny                                 *
 *   andreas.pokorny@biozentrum.uni-wuerzburg.de                           *
 *                                                                         *
 *   This file is part of profdist and cbcanalyzer                         *
 *                                                                         *
 *   Both profdist and cbcanalyzer are free software; you can redistribute *
 *   it and/or modify it under the terms of the GNU General Public License *
 *   as published by the Free Software Foundation; either version 2 of the *
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   Profdist and cbcanalyzer are distributed in the hope that it will be  *
 *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty   *
 *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#ifndef PROFDIST_RELAXED_TREE_HPP_INCLUDED
#define PROFDIST_RELAXED_TREE_HPP_INCLUDED

#include <vector>
#include <list>
#include <set>
#include <iostream>
#include <boost/shared_ptr.hpp>
#include "countedsets.h"
#include "tMatrix.h"

class Node;
namespace tree_types {
  typedef std::map<std::size_t, boost::shared_ptr<Node> > profile_map;
  typedef std::set<std::size_t> profile_set;
  typedef std::set<std::pair<std::size_t, std::size_t> > identical_seq_set;
}

/**
 * \brief Node stores a node in a phylogentic tree.
 * It either has two children or no children at all. 
 */
class Node {
  public:
    typedef CountedSets::set_type set_type;
    typedef boost::shared_ptr<Node> ptr;
    typedef std::vector<ptr> child_container;
    typedef child_container::const_iterator const_iterator;
    typedef child_container::iterator iterator;
    bool is_profile; ///< Indicates that this node has been turned into a profile
    enum profile_reason{ None, Leaf, Bootstrap, Identity, WasAProfile };
    profile_reason reason; 
    std::size_t bootstrap_value ///< a bootstrap value indicateing how many 
      , reference_position ///< reference onto the profile or sequence in the original datastructure. equals to sequence_index in the begining
      , node_index   ///< a unique index used within this 
      , sequence_index; ///< the index in the original alignment 
    set_type split_set; ///< stores the split set used to create this node from consensus

    child_container children; ///< children of this node

  private:
    Node( Node const& );
    Node& operator=( Node const& );
    void propagate_pos();
  public:
    typedef tree_types::profile_set profile_set;
    typedef tree_types::profile_map profile_map;
    typedef tree_types::identical_seq_set identical_seq_set;

    /**
     * \brief Creates an inner node
     * \param[in] set a set containining this subtree 
     * \param[in] bs bootstrap value
     * \param[in] node index of this node in the tree
     */
    Node( set_type const& set, size_t bs, size_t node ) 
      : is_profile(false), reason(None), bootstrap_value(bs), reference_position(0), node_index(node), sequence_index(0), split_set(set) {}

    /**
     * \brief Creates an leaf node
     * \param[in] seq reference / sequence index
     */
    explicit Node( size_t seq ) 
      : is_profile(false), reason(None), bootstrap_value(0), reference_position(seq), node_index(0), sequence_index(seq) {}
    Node();

    /**
     * \brief adds a child to this node
     */
    void add_child( ptr child ) {children.push_back(child);};

    /**
     * \brief Returns the reason for turning this node into a profile.
     * This method maps the profile_reason onto a set of strings, e.g, Bootstrap becomes "Bootstrap"
     * \returns the string representation of the profile_reason
     */
    std::string get_reason_str() const;

    /**
     * \brief Initializes a node
     * \param[in] node_index
     * \param[in] seq_index 
     */
    void init_leaf_seq( std::size_t node_index, std::size_t seq_index );

    /**
     * \brief Initializes a node
     * \param[in] node_index an unique index to identify a node 
     * \param[in] bootstrap the bootstrap value
     * \param[in] split_set set of sequences to create that node
     */
    void init_inner_seq( std::size_t node_index, std::size_t bootstrap, set_type const& split_set );
    /**
     * \brief converts a node into a profile
     * \param[in] prof_count profile index 
     * \param[in] r reason to create this profile
     */
    void turn_into_profile( std::size_t prof_count, profile_reason r );

    /**
     * \brief converts a node into a profile
     * like turn_into_profile but with the slight difference that this profile was built ontop of a 
     * sequence container, and not from previously created profiles.
     * \param[in] prof_count profile index 
     * \param[in] r reason to create this profile
     */
    void turn_into_first_profile( std::size_t prof_count, profile_reason r );

    /**
     * \brief create_subtree creates a subtree from a position in the consensus tree.
     * \param[out] visited stores which leaf has been visited or not
     * \param it the current posision in the consensus tree
     * \param node_index the next index to be used in nodes
     */
    void create_subtree( std::size_t & node_index, CountedSets::const_reverse_iterator it, CountedSets::const_reverse_iterator end, std::vector<char> & visited ); // TODO add end iterator
    /**
     * \brief find_profile recursively searches for profiles in the phylogentic tree
     * \param[out] number of profiles found
     * \param[out] profile_map all profiles found in this step
     * \param[in] known_profiles previously defined profiles - needs clarification
     * \param[in] threshold bootstrap threshold
     * \param[in] pnj_method defines whether the threshold should be used to find additional profiles
     */
    Node::profile_reason find_profile( std::size_t& prof_count, profile_map& profiles, profile_set const& known_profiles, std::size_t threshold, bool pnj_method  );
    /**
     * \brief find_profile recursively searches for profiles in the phylogentic tree
     * \param[out] number of profiles found
     * \param[out] profile_map all profiles found in this step
     * \param[in] ids a set of idencical sequences ..
     * \param[in] known_profiles previously defined profiles - needs clarification
     * \param[in] threshold bootstrap threshold
     * \param[in] pnj_method defines whether the threshold should be used to find additional profiles
     */
    Node::profile_reason find_profile( std::size_t& prof_count, profile_map& profiles, profile_set const& known_profiles, identical_seq_set const& ids, std::size_t threshold, bool pnj_method  );

    /**
     * \brief union_tree adds the content of the nodes found during previous profile calculations trees to this tree. 
     * \param[in] profiles profile nodes found in previous steps
     */
    bool union_tree( profile_map const& profiles );

    /**
     * \brief prints the node using post order in newick format
     * \param[in] out the ostream object to write to
     * \param[in] names all sequence or profile names
     * \param[in] print_with_profiles --
     * \param[in] num_bootstraps number of bootstraps made to create the tree, to allow displaying percent values
     */
    std::ostream& post_order( std::ostream & out, std::vector<std::string> const& names, std::size_t num_bootstraps, bool print_with_profiles = false ) const;
    /**
     * \brief print_graphviz_debug prints the node in AT&T's dot format, for debugging purpose
     * \param[in] out the ostream object to write to 
     * \param[in] names all sequence or profile names
     */
    std::ostream& print_graphviz_debug( std::ostream & out, std::vector<std::string> const& names ) const;

    const_iterator begin() const { return children.begin(); }
    const_iterator end() const { return children.end(); }
    inline bool is_leaf() const { return children.empty();}
    std::size_t get_reference_position() const;
    std::size_t get_sequence_index() const;
    std::size_t get_node_index() const;
    set_type const&  get_split_set() const;
    /**
     * \brief static function used while adding profiles of previous steps to the tree
     * unifies the node with a node found in profiles
     * \param[in] node node to replace by node in profiles
     * \param[in] profilees source of all profiles 
     * \throws runtime_error 
     */
    static void union_node( boost::shared_ptr<Node> &node, profile_map const& profiles );
};

/**
 * \brief Tree stores an unrooted phylogentic tree
 * The tree structure always has three childs. The root of the tree has no content itself 
 * and is picked by accident... 
 */
class Tree {
  public:
  private:
    Node::child_container children;
  public:
    typedef tree_types::profile_set profile_set;
    typedef tree_types::profile_map profile_map;
    typedef tree_types::identical_seq_set identical_seq_set;
    /**
     * \brief find_profile recursively searches for profiles in the phylogentic tree
     * Results are stored in profile_map.
     * \param[out] profile_map all profiles found in this step
     * \param[in] known_profiles previously defined profiles - needs clarification
     * \param[in] threshold bootstrap threshold
     * \param[in] pnj_method defines whether the threshold should be used to find additional profiles
     */
    void find_profile( profile_map & profiles, profile_set const& known_profiles, std::size_t threshold, bool pnj_method  );
    /**
     * \brief find_profile recursively searches for profiles in the phylogentic tree
     * Results are stored in profile_map.
     * \param[out] profile_map all profiles found in this step
     * \param[in] known_profiles previously defined profiles - needs clarification
     * \param[in] identicals sequences which are idendical according to the identity threshold
     * \param[in] threshold bootstrap threshold
     * \param[in] pnj_method defines whether the threshold should be used to find additional profiles
     */
    void find_profile( profile_map & profiles, profile_set const& known_profiles, identical_seq_set const& identicals, std::size_t threshold, bool pnj_method  );
    Tree();

    /**
     * \brief creates the consensus tree from a set of compatible tree splits.
     * \param[in] sets a set of splits with their bootstrap value
     * \param[in] num_leaves number of sequences or profiles in this step of the algorithm
     */
    Tree( CountedSets const& sets, std::size_t num_leaves, std::size_t num_bootstraps);



    /**
     * \brief creates the consensus tree from a set of compatible tree splits.
     * \param[in] sets a set of splits with their bootstrap value
     * \param[in] num_leaves number of sequences or profiles in this step of the algorithm
     */
    void create_tree_bottom_up_without_consense( CountedSets const& sets, std::size_t num_leaves, std::size_t num_bootstraps );



    /**
     * \brief creates the consensus tree from a set of compatible tree splits.
     * \param[in] sets a set of splits with their bootstrap value
     * \param[in] num_leaves number of sequences or profiles in this step of the algorithm
     */
    void create_tree_bottom_up( CountedSets const& sets, std::size_t num_leaves );
     /**
     * \brief replaces leaf nodes with matching profile nodes of previous steps
     * \param[in] profs profile nodes of previous steps
     */
    void union_tree( profile_map const& profs );

    /**
     * \brief prints the tree using post order in newick format
     * \param[in] out the ostream object to write to
     * \param[in] names all sequence or profile names
     * \param[in] num_bootstraps number of bootstraps made to create the tree, to allow displaying percent values
     */
    std::ostream& print( std::ostream & out,  std::vector<std::string> const& names, std::size_t num_bootstraps ) const;
    /**
     * \brief print_graphviz_debug prints the tree in AT&T's dot format, for debugging purpose
     * \param[in] out the ostream object to write to 
     * \param[in] names all sequence or profile names
    */
    std::ostream& print_graphviz_debug( std::ostream & out,  std::vector<std::string> const& names ) const;

    Node::const_iterator begin() const;
    Node::const_iterator end() const;
};

/**
 * get_identical_sequences was supposed to find idendical sequences, 
 * but not used since this metric creates strange results.. 
 */
template<class T>
void get_identical_sequences( tMatrix<T> const& matrix, tree_types::identical_seq_set & ids, int id_threshold )
{
  T threshold = 10.0 / T( id_threshold );

  for( typename tMatrix<T>::const_iterator it = matrix.begin(), e = matrix.end(); 
      it!=e; ++it ) 
      if( *it < threshold )
        ids.insert( std::pair<std::size_t,std::size_t>( 
              (&*it - &*matrix.begin()) % matrix.nRows() 
              , (&*it - &*matrix.begin())/ matrix.nRows() ) 
            );
}

#endif

