/***************************************************************************
 *   Copyright (C) 2005 by Andreas Pokorny                                 *
 *   andreas.pokorny@biozentrum.uni-wuerzburg.de                           *
 *                                                                         *
 *   This file is part of profdist and cbcanalyzer                         *
 *                                                                         *
 *   Both profdist and cbcanalyzer are free software; you can redistribute * 
 *   it and/or modify it under the terms of the GNU General Public License * 
 *   as published by the Free Software Foundation; either version 2 of the * 
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   Profdist and cbcanalyzer are distributed in the hope that it will be  *
 *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty   *
 *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#include <algorithm>
#include <utility>
#include <stdexcept>
#include "aligncode.h"

using namespace std;
using namespace profdist;

const DNAMapper AlignCode::mapper;

AlignCode::AlignCode( )
{
}

AlignCode::AlignCode( size_t num_sequences, size_t num_sites )
  : alignment_codes( num_sequences -1 , AlignCode::d_list() )
  , sequence_names( num_sequences, std::string("not available"))
  , reference_sequence( num_sites, mapper.dna2code['-'] )
{
  count_matrix base(0U);
  base(mapper.dna2code['-'], mapper.dna2code['-']) = num_sites; 
  count_matrices.resize( num_sequences - 1, base );
}

size_t AlignCode::get_num_sites() const
{
  return reference_sequence.size();
}


size_t AlignCode::get_num_sequences() const
{
  return alignment_codes.empty()?0:(alignment_codes.size() + 1);
}

void AlignCode::clear()
{
  reference_sequence.clear();
  count_matrices.clear();
  sequence_names.clear();
  alignment_codes.clear();
}


void AlignCode::clear_resize( size_t num_sequences, size_t num_sites )
{
  count_matrix base(0U);
  if( get_num_sequences() )
  {
    for( size_t i = 0, e = get_num_sequences()-1; i != e; ++i )
    {
      alignment_codes[i].clear();
      count_matrices[i] = base;
    }
  }
  alignment_codes.resize( num_sequences - 1 );
  reference_sequence.resize( num_sites );
  sequence_names.resize( num_sequences);
  count_matrices.resize( num_sequences - 1, base );
}

void AlignCode::resize( size_t num_sequences, size_t num_sites )
{
  if( num_sequences == get_num_sequences() && num_sites == get_num_sites() ) 
    return;
  if( num_sequences == 0 )
  {
    clear();
    return;
  }
  if( num_sites == 0 ) 
  {
    reference_sequence.clear();
    sequence_names.resize( num_sequences, std::string("not available"));
    alignment_codes.resize( num_sequences - 1);
    count_matrix base(0U);
    count_matrices.resize( num_sequences - 1, base);
    for( size_t i = 0; i < alignment_codes.size(); ++i )
    {
      alignment_codes[i].clear();
      count_matrices[i] = base;
    }
    return;
  }
  
  bool bigger = num_sites > get_num_sites();
  size_t difference = num_sites - get_num_sites();
  size_t ac_size = min( get_num_sequences(), num_sequences );
  size_t gap = mapper.dna2code['-'];
  if(ac_size)
    --ac_size;
  // the gcc-stl will keep the data, even when the new size is bigger
  // the stl vector will copy the old data
  // if the new size is bigger it will add copies of the element supplied in the second parameter
  count_matrix base(0U);
  d_list empty_list;
  if( get_num_sequences() < num_sequences ) 
  {
    base(gap, gap) = (difference>0)*difference; 
    for( size_t i = 0, e = min( get_num_sites(), num_sites ); i < e; ++i )
    {
      empty_list.push_back( a_pair( i, gap ) );
      ++base( reference_sequence[i], gap );
    }
  }

  count_matrices.resize( num_sequences  - 1, base );
  sequence_names.resize( num_sequences, std::string("not available") ); 
  alignment_codes.resize( num_sequences - 1, empty_list );

  // depending on the new sequence length, we have to alter our remainging count matrices, and 
  if( !bigger )
  {
    // First we reduce the count matrices
    for( size_t i = 0; i < ac_size; ++i )
    {
      size_t pos  = 0;
      for( const_sequence_iterator it = begin(i+1) + num_sites, e = end(i+1);
          it != e; ++it, ++pos )
        --count_matrices[i]( reference_sequence[pos + num_sites], *it );
    }

    // Now we crop the old data --
    for( size_t i = 0; i < alignment_codes.size(); ++i )
    {
      d_list &ref = alignment_codes[i];
      while( !ref.empty() && ref.back().first >= num_sites )
        ref.pop_back();
    }
  }
  else 
  {
    // We only have to increase the gap-gap values in the count matrices
    for( size_t i = 0; i < ac_size; ++i )
      count_matrices[i](gap,gap) += difference; 
  }

  // now we can adjust the reference sequence 
  reference_sequence.resize( num_sites, mapper.dna2code['-'] );
}

//------------------------ const_sequence_iterator -------------------------------
AlignCode::const_sequence_iterator::const_sequence_iterator( )
  : ref(0), seq_index(0), position(0)
{
}


AlignCode::const_sequence_iterator::const_sequence_iterator( AlignCode::sequence_iterator const &s_it )
  : ref(s_it.ref), seq_index(s_it.seq_index), position(s_it.position), it(s_it.it)
{
}

AlignCode::const_sequence_iterator& AlignCode::const_sequence_iterator::operator=( AlignCode::sequence_iterator const &s_it )
{
  ref = s_it.ref;
  seq_index = s_it.seq_index;
  position = s_it.position;
  it = s_it.it;
  return *this;
}

AlignCode::const_sequence_iterator::const_sequence_iterator( AlignCode const& r, size_t seq )
  : ref(&r), seq_index(seq), position(0)
{
  if(seq_index == 0 )
    it = ref->alignment_codes[0].end();
  else 
    it = ref->alignment_codes[seq_index-1].begin();
}

AlignCode::const_sequence_iterator::const_sequence_iterator( AlignCode const& r, size_t seq, size_t pos, AlignCode::d_list::const_iterator const& it )
  : ref(&r), seq_index(seq), position(pos), it(it)
{
}

AlignCode::const_sequence_iterator & AlignCode::const_sequence_iterator::operator++()
{
  ++position;
  if( seq_index != 0 )
  {
      d_list::const_iterator e = ref->alignment_codes[seq_index-1].end();
      while( it != e && position > it->first ) ++it;
  }
  return *this;
}

AlignCode::const_sequence_iterator AlignCode::const_sequence_iterator::operator++( int )
{
  const_sequence_iterator copy(*this);
  ++(*this);
  return copy;
}

AlignCode::const_sequence_iterator & AlignCode::const_sequence_iterator::operator--()
{
  --position;
  if( seq_index != 0 )
  {
    d_list::const_iterator e = ref->alignment_codes[seq_index-1].end();
    if(it == e ) --it;
    while( it != ref->alignment_codes[seq_index-1].begin() && position < it->first )
      --it;
  }
  return *this;

}

AlignCode::const_sequence_iterator AlignCode::const_sequence_iterator::operator--( int )
{
  const_sequence_iterator copy(*this);
  --(*this);
  return copy;
}

AlignCode::const_sequence_iterator AlignCode::const_sequence_iterator::operator+( long diff ) const
{
  const_sequence_iterator copy(*this);
  return copy += diff;
}

AlignCode::const_sequence_iterator AlignCode::const_sequence_iterator::operator-( long diff ) const
{
  const_sequence_iterator copy(*this);
  return copy -= diff;
}

AlignCode::const_sequence_iterator & AlignCode::const_sequence_iterator::operator+=( long diff )
{
  if( diff < 0 )
    return *this -= -diff;
  else if( diff )
  {
    position += diff - 1;
    ++*this;
  }
  return *this;
}
AlignCode::const_sequence_iterator & AlignCode::const_sequence_iterator::operator-=( long diff )
{
  if( diff < 0 )
    return *this += -diff;
  else if( diff )
  {
    position -= diff - 1;
    --*this;
  }
  return *this;
}

char const& AlignCode::const_sequence_iterator::operator*() const
{
  return (seq_index==0 || it==ref->alignment_codes[seq_index-1].end() || it->first != position)? ref->reference_sequence[position] : it->second;
}


bool operator==(AlignCode::const_sequence_iterator const& l, AlignCode::const_sequence_iterator const& r)
{
  return l.position == r.position && l.ref == r.ref && l.seq_index == r.seq_index;
}

bool operator!=(AlignCode::const_sequence_iterator const& l, AlignCode::const_sequence_iterator const& r)
{
  return !(l == r);
}

//--------------------------------- sequence_iterator -----------------------------
AlignCode::sequence_iterator::sequence_iterator( )
  : ref(0), seq_index(0), position(0), local_proxy(*this)
{
}

AlignCode::sequence_iterator::sequence_iterator( AlignCode & r, size_t seq )
  : ref(&r), seq_index(seq), position(0), local_proxy(*this)
{
  if(seq_index == 0 )
    it = ref->alignment_codes[0].end();
  else 
    it = ref->alignment_codes[seq_index-1].begin();
}

AlignCode::sequence_iterator::sequence_iterator( AlignCode & r, size_t seq, size_t pos, AlignCode::d_list::iterator const& it )
  : ref(&r), seq_index(seq), position(pos), it(it), local_proxy(*this)
{
}

AlignCode::sequence_iterator & AlignCode::sequence_iterator::operator++()
{
  ++position;
  if( seq_index != 0 )
  {
      d_list::const_iterator e = ref->alignment_codes[seq_index-1].end();
      while( it != e && position > it->first ) ++it;
  }
  return *this;
}

AlignCode::sequence_iterator AlignCode::sequence_iterator::operator++( int )
{
  sequence_iterator copy(*this);
  ++(*this);
  return copy;
}

AlignCode::sequence_iterator & AlignCode::sequence_iterator::operator--()
{
  --position;
  if( seq_index != 0 )
  {
    d_list::const_iterator e = ref->alignment_codes[seq_index-1].end();
    if(it == e ) --it;
    while( it != ref->alignment_codes[seq_index-1].begin() && position < it->first )
      --it;
  }
  return *this;

}

AlignCode::sequence_iterator AlignCode::sequence_iterator::operator--( int )
{
  sequence_iterator copy(*this);
  --(*this);
  return copy;
}

AlignCode::sequence_iterator AlignCode::sequence_iterator::operator+( long diff ) const
{
  sequence_iterator copy(*this);
  return copy += diff;
}

AlignCode::sequence_iterator AlignCode::sequence_iterator::operator-( long diff ) const
{
  sequence_iterator copy(*this);
  return copy -= diff;
}



AlignCode::sequence_iterator & AlignCode::sequence_iterator::operator+=( long diff )
{
  if( diff < 0 )
    return *this += -diff;
  else if( diff )
  {
    position += diff - 1;
    ++*this;
  }
  return *this;
}
AlignCode::sequence_iterator & AlignCode::sequence_iterator::operator-=( long diff )
{
  if( diff < 0 )
    return *this += -diff;
  else if( diff )
  {
    position -= diff - 1;
    --*this;
  }
  return *this;
}

AlignCode::sequence_iterator::CharProxy const& AlignCode::sequence_iterator::operator*() const
{
  return local_proxy;
}

bool operator==(AlignCode::sequence_iterator const& l, AlignCode::sequence_iterator const& r)
{
  return l.position == r.position && (l.ref) == (r.ref) && l.seq_index == r.seq_index;
}

bool operator!=(AlignCode::sequence_iterator const& l, AlignCode::sequence_iterator const& r)
{
  return !(l == r);
}
//--------------------------------------CharProxy----------------------------------------------

AlignCode::sequence_iterator::CharProxy::CharProxy( sequence_iterator const& iter )
  : it(iter)
{
}

AlignCode::sequence_iterator::CharProxy const& AlignCode::sequence_iterator::CharProxy::operator=( char const& rhs ) const
{
  char copy = rhs>=16?AlignCode::mapper.dna2code[rhs]:rhs;
  char current =*AlignCode::const_sequence_iterator(it); // Note: the const iterator, returns a const ref onto a char.
  if( copy != current )
  {
    AlignCode &ref = *(it.ref);
    if( it.seq_index == 0 )
    {
      for( size_t i = 0; i < ref.alignment_codes.size(); ++ i )
      {
        AlignCode::sequence_iterator iter = ref.begin(i+1) + it.position;
        char other = *AlignCode::const_sequence_iterator(iter);
        
        // change entries in all aligncodes
        if( other == current && other != copy ) // there was no entry, thus we add one
          ref.alignment_codes[i].insert(iter.it, a_pair( it.position, other ) );
        else if ( other != current && other == copy ) // there was none, but we dont need it anymor
          ref.alignment_codes[i].erase(iter.it);
        // the next case : there was a difference, but there still is a difference, needs no data modification 
        
        // change entries in all countmatrices 
        --(ref.count_matrices[i][ current ][ other ]);
        ++(ref.count_matrices[i][ copy][ other ]);
      }
      ref.reference_sequence[it.position] = copy;
    }
    else {
      char ref_entry = ref.reference_sequence[it.position];
      // decrease one entry in count matrices
      --(ref.count_matrices[it.seq_index-1][ ref_entry][ current ]);
      ++(ref.count_matrices[it.seq_index-1][ ref_entry][ copy ]);

      if( ref_entry == copy ) // delete entry because sequences are equal here
        it.it = ref.alignment_codes[it.seq_index - 1].erase( it.it );
      else if( ref_entry == current ) // add entry because sequences are no longer equal here
        it.it = ref.alignment_codes[it.seq_index - 1].insert( it.it, a_pair( it.position, copy ) );
      else // we just have to modify the a_pair object 
        it.it->second = copy;
 
    }
  }
  return *this;
}

/*AlignCode::sequence_iterator::CharProxy::operator char() const
{
#warning method needs to be implemented
}*/
//--------------------------------------AlignCode--again---------------------------------------

AlignCode::const_sequence_iterator AlignCode::begin( size_t sequence_index ) const
{
  return const_sequence_iterator( *this, sequence_index );
}

AlignCode::const_sequence_iterator AlignCode::end( size_t sequence_index ) const
{
  return const_sequence_iterator( *this, sequence_index, reference_sequence.size(), 
      sequence_index
      ? alignment_codes[sequence_index-1].end()
      : alignment_codes[0].end() );
}

AlignCode::sequence_iterator AlignCode::begin( size_t sequence_index )
{
  return sequence_iterator( *this, sequence_index );
}

AlignCode::sequence_iterator AlignCode::end( size_t sequence_index )
{
  return sequence_iterator( *this, sequence_index, reference_sequence.size(), 
      sequence_index
      ? alignment_codes[sequence_index-1].end()
      : alignment_codes[0].end() );
}

AlignCode::const_diff_iterator AlignCode::begin_difference( size_t i ) const
{
  return alignment_codes[i].begin();
}

AlignCode::const_diff_iterator AlignCode::end_difference( size_t i ) const
{
  return alignment_codes[i].end();
}

std::string const& AlignCode::get_sequence_name( size_t sequence_index ) const
{
  return sequence_names[sequence_index];
}

void AlignCode::set_sequence_name( std::string const& s, size_t sequence_index )
{
  sequence_names[sequence_index] = s;
}

profdist::count_matrix const& AlignCode::get_matrix( size_t i ) const
{
  return count_matrices[i];
}

void AlignCode::push_back( char item )
{
  item = mapper.dna2code[item];
  
  reference_sequence.push_back(item);
  if( get_num_sequences() )
    for( vector<profdist::count_matrix>::iterator i = count_matrices.begin(), 
        e = count_matrices.end(); i < e; ++i )
      ++((*i)[item][item]);
}

void AlignCode::push_back( vector<char> const& items )
{
  if( items.size() != sequence_names.size() )
    throw logic_error("Sequence items and available sequences do not match");
  char ref_item = mapper.dna2code[items[0]];

  reference_sequence.push_back( ref_item );
  if( get_num_sequences() )
  {
    vector<profdist::count_matrix>::iterator c_it = count_matrices.begin();
    vector< d_list >::iterator a_it = alignment_codes.begin();
    for( vector<char>::const_iterator it = ++(items.begin()), end = items.end() ;
        it != end ; ++ it, ++a_it, ++c_it )
    {
      char item = mapper.dna2code[*it];

      if( item != ref_item )
        a_it->push_back( a_pair( reference_sequence.size() - 1, item )  );

      ++((*c_it)[ref_item][item]);
    }
  }
}

void AlignCode::debug( std::ostream & out ) const 
{
  out << "Aligncode: " << get_num_sequences() << " seq " << get_num_sites() << " sites\nref:";
  for( std::size_t i = 0; i < get_num_sites(); ++i ) 
    out << mapper.code2dna[reference_sequence[i]];

  for( std::size_t i = 0; i < get_num_sequences() - 1; ++i ) 
  {
    out << '\n';
    for( d_list::const_iterator it = alignment_codes[i].begin(), e = alignment_codes[i].end(); it!=e; ++it )
      out << '(' << it->first << ", " << mapper.code2dna[it->second] << ')'; 
  }
  out << '\n';
}

void AlignCode::get_identical_sequences( tree_types::identical_seq_set & ids, float percentual_identity  ) const
{
  float difference_threshold = 1.0f - std::min( std::abs(percentual_identity) , 1.0f );
  for( std::size_t i = 0; i < alignment_codes.size(); ++ i )
    if( difference_threshold > float(alignment_codes[i].size()) / float(reference_sequence.size()) )
    {
      ids.insert( std::make_pair(0, i + 1) );
      ids.insert( std::make_pair(i+1, 0));
    }

  for( std::size_t index_1 = 0; index_1 < alignment_codes.size() - 1; ++index_1 ) {
    for(  std::size_t index_2 = index_1+1; index_2 < alignment_codes.size(); ++index_2 ){
      const_diff_iterator b_1 = begin_difference( index_1 )
        , e_1 = end_difference( index_1 )
        , b_2 = begin_difference( index_2 )
        , e_2 = end_difference( index_2 );
      std::size_t counter = 0;
      while( b_1 != e_1 && b_2 != e_2 ) {
        if( b_1->first == b_2->first ) 
        {
          counter += (b_1->second != b_2->second );
          ++b_1;
          ++b_2;
        }
        else if( b_1->first < b_2->first )  ++counter, ++b_1;
        else if( b_1->first > b_2->first )  ++counter, ++b_2;
      }
      while( b_1 != e_1 )++counter, ++b_1;
      while( b_2 != e_2 )++counter, ++b_2;

      if( difference_threshold >  float(counter) / float(reference_sequence.size())  ){
        ids.insert( std::make_pair(index_1+1, index_2+1));
        ids.insert( std::make_pair(index_2+1, index_1+1));
      }
        
    }
  }

}

std::vector<std::string> const& AlignCode::get_sequence_names() const
{ return sequence_names; }

