/***************************************************************************
 *   Copyright (C) 2005 by Andreas Pokorny                                 *
 *   andreas.pokorny@biozentrum.uni-wuerzburg.de                           *
 *                                                                         *
 *   This file is part of profdist and cbcanalyzer                         *
 *                                                                         *
 *   Both profdist and cbcanalyzer are free software; you can redistribute * 
 *   it and/or modify it under the terms of the GNU General Public License * 
 *   as published by the Free Software Foundation; either version 2 of the * 
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   Profdist and cbcanalyzer are distributed in the hope that it will be  *
 *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty   *
 *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#include <fstream>
#include <iterator>
#include <boost/lexical_cast.hpp>
#include "countedsets.h"

// #define PROFDIST_DEBUG 
std::size_t CountedSets::count_difference( set_type const& a, set_type const& b )  const
{
  set_type::const_iterator it_a = a.begin(), it_b = b.begin(),
    e_a = a.end(), e_b = b.end();
  
  std::size_t count = a.size();
  while( it_b != e_b && it_a != e_a )
    if( *it_b == *it_a ) --count,++it_b,++it_a;
    else if( *it_b < *it_a ) ++it_b;
    else ++it_a;
  return count;
}

bool CountedSets::is_compatible( set_counter_type const& base, set_type const& a ) const
{
  for( const_iterator it = base.begin(), e = base.end();
      it != e; ++it )
  {
    size_t temp_size, diff_size;
    if( it->first.size() > a.size() )
    {
      temp_size = a.size();
      diff_size = count_difference( a, it->first );
    }
    else 
    {
      temp_size= it->first.size();
      diff_size = count_difference( it->first, a );
    }
    if ( diff_size && diff_size < temp_size)
    { 
#ifdef PROFDIST_DEBUG
      std::ofstream deb("debug_data", std::ios::app );
      deb << "Set is not compatible:" << std::endl; 
      print_split_set( deb, a ); 
      deb << "\n to ";
      print_split_set( deb, it->first  ); 
      deb << std::endl << std::endl;
#endif
      return false;
    }
  }
  return true;
}
/*
void CountedSets::old_consense( std::size_t num_seqs )
{
#ifdef PROFDIST_DEBUG
  std::ofstream deb("debug_data", std::ios::app);
  deb << "Status vor consense:\n" << *this << std::endl;
#endif
  set_type complete_tree;
  for( unsigned int i = 0; i < num_seqs; ++i )
    complete_tree.insert( i );

  typedef std::multimap<std::size_t, const_iterator> sr_type;
  sr_type sorted_rest;

  for( set_counter_type::iterator it = set_counter.begin(), e = set_counter.end(); 
      it != e; 
      ++it )
  {
    if( it->first.size() >= num_seqs - 1 )  // we skip all nearly complete trees 
    {
      set_counter.erase( it );
      break;
    }
    else 
    {
      // Create complementary subtree of set pointed onto by it.
      set_type complementary_tree( complete_tree );
      
      std::size_t db_c=0;
      for( set_type::const_iterator set_it = it->first.begin(), set_end = it->first.end(); 
          set_it != set_end; 
          ++set_it ) 
        complementary_tree.erase( *set_it );

      // search the complementary tree here:
      set_counter_type::iterator comp_it = set_counter.find( complementary_tree );
      if( comp_it != set_counter.end() )  // if found remove the one with the small bootstrap value
      {

#ifdef PROFDIST_DEBUG
        deb << "Eraseing split because of better bootstrapped complementary tree:";
#endif
        if( comp_it->second > it->second )
        {
          set_counter_type::iterator temp(it);
          bool is_begin = temp == set_counter.begin();
          --temp;
#ifdef PROFDIST_DEBUG
          print_split_set( deb, it->first );
#endif
          set_counter.erase( it->first );
          if( is_begin ) it = set_counter.begin();
          else it = temp;
          continue;  // => we will add comp_it later
        }
        else {
#ifdef PROFDIST_DEBUG 
          print_split_set( deb, comp_it->first );
#endif
          set_counter.erase( comp_it );
        }
      }

      sorted_rest.insert(  std::make_pair( it->second, it ) ); 
    }
  }

  // now we fill a new subtree counter from sorted_rest
  set_counter_type new_counter;

  // fill all compatible trees into the new counter
  for( sr_type::const_reverse_iterator it = sorted_rest.rbegin(), e = sorted_rest.rend(); 
      it != e;
      ++it )
  {
    if( is_compatible( new_counter, it->second->first ) )
      new_counter.insert( std::make_pair( it->second->first, it->first ) );
#if 0
    else { 
      deb << "Set is not compatible:" << std::endl; 
      print_split_set( deb, it->second->first  );
      deb << " | bs:" << it->first << std::endl;
    }
#endif
  }

  // swap the data fastly..
  set_counter.swap( new_counter );

#ifdef PROFDIST_DEBUG
  deb << "Status nach consense:\n" << *this << std::endl;
#endif
}
*/

void CountedSets::consense( std::size_t num_seqs )
{
#ifdef PROFDIST_DEBUG
  std::ofstream deb("debug_data", std::ios::app);
  deb << "Status vor consense:\n" << *this << std::endl;
#endif
  set_type complete_tree;
  for( unsigned int i = 0; i < num_seqs; ++i )
    complete_tree.insert( i );

  typedef std::multimap<std::size_t, std::pair< const_iterator, const_iterator> > sr_type;
  sr_type sorted_rest;

  for( set_counter_type::iterator it = set_counter.begin(), e = set_counter.end(); 
      it != e; 
      ++it )
  {
    if( it->first.size() >= num_seqs - 1 || it->first.size() <= 1 )  // we skip all nearly complete trees 
    {
  //    set_counter.erase( it );
   //   break;
    }
    else 
    {
      // Create complementary subtree of set pointed onto by it.
      set_type complementary_tree( complete_tree );
      
      std::size_t db_c=0;
      for( set_type::const_iterator set_it = it->first.begin(), set_end = it->first.end(); 
          set_it != set_end; 
          ++set_it ) 
        complementary_tree.erase( *set_it );
      

      // search the complementary tree here:
      set_counter_type::iterator comp_it = set_counter.find( complementary_tree );
      if( comp_it == set_counter.end() )  // if found remove the one with the small bootstrap value
        comp_it = set_counter.insert( std::make_pair( complementary_tree, it->second ) ).first;
      
      sorted_rest.insert(  std::make_pair( it->second, std::make_pair( it, comp_it ) ) ); 
    }
  }

  // now we fill a new subtree counter from sorted_rest
  set_counter_type new_counter;

  // fill all compatible trees into the new counter
  for( sr_type::const_reverse_iterator it = sorted_rest.rbegin(), e = sorted_rest.rend(); 
      it != e;
      ++it )
  {
    if( is_compatible( new_counter, it->second.first->first ) )
      new_counter.insert( std::make_pair( it->second.first->first, it->first ) );
    else if( is_compatible( new_counter, it->second.second->first ) )
      new_counter.insert( std::make_pair( it->second.second->first, it->first ) );
#ifdef PROFDIST_DEBUG
    else { 
      deb << "Set is not compatible:" << std::endl; 
      print_split_set( deb, it->second.first->first  );
      deb << " | bs:" << it->first << std::endl;
      deb << "and neither is its complementary set:" << std::endl; 
      print_split_set( deb, it->second.second->first  );
      deb << " | bs:" << it->first << std::endl << std::endl;
    }
#endif
  }

  // swap the data fastly..
  set_counter.swap( new_counter );

#ifdef PROFDIST_DEBUG
  deb << "Status nach consense:\n" << *this << std::endl;
#endif
}

void CountedSets::add_set( set_type const& subtree, std::size_t count )
{
#ifdef PROFDIST_DEBUG
  std::ofstream deb("debug_data", std::ios::app);
  deb << "add_set:" << *reinterpret_cast<long*>(this) << " ";
  print_split_set(deb, subtree ); deb << std::endl;
#endif
  set_counter_type::iterator it = set_counter.find( subtree );
  if( it == set_counter.end() ) 
    set_counter.insert( std::pair<set_type,std::size_t>( subtree, count) );
  else 
    it->second+=count;
}

std::size_t CountedSets::get_count( set_type const& subtree ) const
{
  set_counter_type::const_iterator it = set_counter.find( subtree );
  return it == set_counter.end() ? 0 : it->second;
}

CountedSets::const_iterator CountedSets::begin() const
{
  return set_counter.begin();
}

CountedSets::const_iterator CountedSets::end() const
{
  return set_counter.end();
}

CountedSets::const_reverse_iterator CountedSets::rbegin() const
{
  return set_counter.rbegin();
}

CountedSets::const_reverse_iterator CountedSets::rend() const
{
  return set_counter.rend();
}

void CountedSets::check_set( set_type const& a, size_t num_seq ) const
{
  if( a.size() > num_seq )
    std::cout << "BAD DATASET : num_seq is "<< num_seq << " and split has " << a.size() << " elements." << std::endl;
  for( set_type::const_iterator it = a.begin(), e = a.end() ; it != e; ++it )
    if( *it >= num_seq )
      std::cout << "bad element:" << *it << std::endl;
}


void CountedSets::clear() {
  set_counter.clear();
}

std::ostream& CountedSets::print( std::ostream& out ) const
{
  for( const_iterator it = begin(), e = end();it != e; ++it)
  {
    print_split_set( out, it->first );
    out << "|" << "count: " << it->second  << "\n";
  }
  return out;
}


std::ostream& operator<< ( std::ostream & out, CountedSets const& obj )
{
  return obj.print( out );
}

void print_split_set( std::ostream & out, CountedSets::set_type const& split )
{
  if( split.begin() == split.end() ) 
    out << "EmptySplit size: 0";
  else {
    CountedSets::set_type::const_iterator start =split.begin()
      , end = split.begin()
      , end_set = split.end()
      , old = split.begin();
    for(;end != end_set; ++end) {
      if( *end - *old > 1 ) {
        if( *old == *start )
          out << *start<< '|';
        else 
          out << *start << '-' << *old << '|';
        start = end;
      }
      old = end;
    }
    if( *old == *start )
      out << *start;
    else 
      out << *start << '-' << *old;
    out << " size: " << split.size();
  }
}

