/***************************************************************************
 *   Copyright (C) 2005 by Andreas Pokorny                                 *
 *   andreas.pokorny@biozentrum.uni-wuerzburg.de                           *
 *                                                                         *
 *   This file is part of profdist and cbcanalyzer                         *
 *                                                                         *
 *   Both profdist and cbcanalyzer are free software; you can redistribute * 
 *   it and/or modify it under the terms of the GNU General Public License * 
 *   as published by the Free Software Foundation; either version 2 of the * 
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   Profdist and cbcanalyzer are distributed in the hope that it will be  *
 *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty   *
 *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

/**
 * @file fasta_parser.cpp contains a simple fasta file format parser
 * @author Andreas Pokorny
 * @data 14.12.2004 
 */

#include <list>
#include <stdexcept>
#include <string>
#include <cassert>
#include <sstream>
#include <boost/cstdlib.hpp>
#include <boost/spirit/phoenix.hpp>
#include <boost/spirit/core.hpp>
#include <boost/spirit/core/primitives/primitives.hpp>
#include <boost/spirit/utility/grammar_def.hpp>
#include <boost/spirit/utility/lists.hpp>
#include <boost/spirit/iterator/file_iterator.hpp>
#include <boost/spirit/iterator/position_iterator.hpp>
#include <boost/spirit/core/composite/epsilon.hpp>
#include <boost/spirit/actor/push_back_actor.hpp>
#include <boost/spirit/actor/increment_actor.hpp>
#include <boost/spirit/actor/clear_actor.hpp>

#include "parser.h"
#include "spirit_helper.h"

using namespace std;

#if 0
void parse_fasta( std::string const& filename, std::list<file_sequence>& sequences )
{
  using boost::spirit::file_iterator;
  using boost::spirit::position_iterator;
  using boost::spirit::parse;
  using boost::spirit::parse_info;
  typedef char char_t;
  typedef position_iterator<file_iterator<char_t> > iterator_t;
  file_iterator<char_t> file_handle( filename );

  if (!file_handle)
    throw std::runtime_error( ("Unable to open file " + filename) );

  iterator_t first( file_handle, file_handle.make_end(), filename);
  iterator_t last( file_handle.make_end(), file_handle.make_end(), filename);

  using boost::spirit::anychar_p;
  using boost::spirit::alpha_p;
  using boost::spirit::space_p;
  using boost::spirit::chset;
  using boost::spirit::eps_p;
  using boost::spirit::clear_a;
  using boost::spirit::push_back_a;
  using boost::spirit::assign_a;
  using boost::spirit::increment_a;
  file_sequence current_sequence; ///< temporary file_sequence object 
  file_sequence::string_range current_range; ///< temporary storage for list entries 

  parse_info<iterator_t> info = parse( first, last, 
      *(
        *space_p
        >> '>' 
        >> (
          *( anychar_p - chset<>("\n\r") )
          )
           [assign_a( current_sequence.id )]
        >> lf_p
        >>
        +( 
          (+((alpha_p|'-') [increment_a( current_sequence.sequence_length ) ] ) )
          [assign_a( current_range )]
          [push_back_a( current_sequence.sequence_data, current_range ) ] 
          | space_p 
         ) 
        >> eps_p
        [boost::spirit::push_back_a( sequences, current_sequence )]
        [clear_a( current_sequence )]
       )
      );

  if( !info.full )
  {
    std::ostringstream out;
    out << "Parsing failed at line " << info.stop.get_position().line << " and column " 
      << info.stop.get_position().column << " in file " << info.stop.get_position().file 
      << '\n' << sequences.size() << " Sequences were found.\nError happened near:\n[...]" 
      << std::string( std::max( first, info.stop - 20 ), info.stop) << '\n';
    throw std::runtime_error( out.str() );
  }
}

#endif

#include <iostream>
#include <fstream>

void parse_fasta( std::string const& filename, std::list<file_sequence>& sequences )
{
  using boost::spirit::file_iterator;
  using boost::spirit::position_iterator;
  using boost::spirit::parse;
  using boost::spirit::parse_info;
  typedef position_iterator<const char*> iterator_t;
  
  std::ifstream in(filename.c_str());

  if (!in)
    throw std::runtime_error( ("Unable to open file " + filename) );
  
  in.seekg(0, std::ios::end);
  size_t size = in.tellg();
  in.seekg(0, std::ios::beg);
  
  char* data = new char[size + 1];
  in.read(data, size);
  data[size] = 0;

  iterator_t first( data, data + size, filename);
  iterator_t last( data + size, data + size, filename);

  using boost::spirit::anychar_p;
  using boost::spirit::alpha_p;
  using boost::spirit::space_p;
  using boost::spirit::chset;
  using boost::spirit::eps_p;
  using boost::spirit::clear_a;
  using boost::spirit::push_back_a;
  using boost::spirit::assign_a;
  using boost::spirit::increment_a;
  file_sequence current_sequence; ///< temporary file_sequence object 
  file_sequence::string_range current_range; ///< temporary storage for list entries 

  parse_info<iterator_t> info = parse( first, last, 
      *(
        *space_p
        >> '>' 
        >> (
          *( anychar_p - chset<>("\n\r") )
          )
           [assign_a( current_sequence.id )]
        >> lf_p
        >>
        +( 
          (+((alpha_p|'-') [increment_a( current_sequence.sequence_length ) ] ) )
          [assign_a( current_range )]
          [push_back_a( current_sequence.sequence_data, current_range ) ] 
          | space_p 
         ) 
        >> eps_p
        [boost::spirit::push_back_a( sequences, current_sequence )]
        [clear_a( current_sequence )]
       )
      );

  if( !info.full )
  {
    std::ostringstream out;
    out << "Parsing failed at line " << info.stop.get_position().line << " and column " 
      << info.stop.get_position().column << " in file " << info.stop.get_position().file 
      << '\n' << sequences.size() << " Sequences were found.\nError happened near:\n[...]" 
      << std::string( std::max( first, info.stop - 20 ), info.stop) << '\n';
	delete [] data;
    throw std::runtime_error( out.str() );
  }
  
  delete [] data;
}



#ifdef BUILD_FASTA_TEST
#include <iostream>
int main( int argc, char ** argv )
{

  if( argc != 1 )
  {
    std::list<file_sequence> seq;
    try { 
      parse_fasta( argv[1], seq );
    }
    catch ( runtime_error & e )
    {
      std::cout << e.what() << std::endl;
    }
    for( std::list<file_sequence>::const_iterator it = seq.begin(), e = seq.end();
        it != e; ++it)
    {
      std::cout << "ID:" << std::string(it->id.first, it->id.second) << std::endl;
      std::cout << "CLASSIFCATION:" << std::endl;
      for( std::list<file_sequence::string_range>::const_iterator _it = it->classification.begin(),
          _e = it->classification.end();
          _it != _e; ++_it )
        std::cout << '|' << std::string(_it->first, _it->second ) << "|";
      std::cout << std::endl << "SEQUENCE:" << std::endl;
      for( std::list<file_sequence::string_range>::const_iterator _it = it->sequence_data.begin(),
          _e = it->sequence_data.end();
          _it != _e; ++_it )
        std::cout << '|' << std::string(_it->first, _it->second ) << "|";
      std::cout << std::endl;
    }
  }
  else {
    std::cout << "Add a file name to the command to test the embl parser" << std::endl;

  }

}

#endif // BUILD_FASTA_TEST

