/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* libe-book
 * Version: MPL 2.0 / LGPLv2.1+
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * Alternatively, the contents of this file may be used under the terms
 * of the GNU Lesser General Public License Version 2.1 or later
 * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
 * applicable instead of those above.
 *
 * For further information visit http://libebook.sourceforge.net
 */

#include <libwpd/WPXBinaryData.h>
#include <libwpd/WPXDocumentInterface.h>

#include <libwpd-stream/libwpd-stream.h>

#include "libebook_utils.h"
#include "EBOOKMemoryStream.h"
#include "EBOOKZlibStream.h"
#include "PDXLZ77Stream.h"
#include "PMLParser.h"
#include "PMLTypes.h"

using boost::scoped_ptr;

using std::string;
using std::vector;

namespace libebook
{

namespace
{

static const uint32_t PML_TYPE = PDX_CODE("PNRd");
static const uint32_t PML_CREATOR = PDX_CODE("PPrs");

enum Compression
{
  PML_COMPRESSION_UNKNOWN,
  PML_COMPRESSION_LZ77, // also known as PalmDoc
  PML_COMPRESSION_LZ77_OBFUSCATED, // PalmDoc, xor-ed with 0xa5
  PML_COMPRESSION_ZLIB,
  PML_COMPRESSION_DRM
};

Compression readCompression(WPXInputStream *indexStream);

class XorStream : public WPXInputStream
{
  class StreamException
  {
  };

public:
  XorStream(WPXInputStream *stream, unsigned char xorValue);

  virtual bool isOLEStream();
  virtual WPXInputStream *getDocumentOLEStream(const char *name);

  virtual const unsigned char *read(unsigned long numBytes, unsigned long &numBytesRead);
  virtual int seek(long offset, WPX_SEEK_TYPE seekType);
  virtual long tell();
  virtual bool atEOS();

private:
  boost::scoped_ptr<WPXInputStream> m_stream;
};

}

namespace
{

Compression readCompression(WPXInputStream *strm)
{
  const uint32_t compression = readU16(strm, true);
  switch (compression)
  {
  case 2 :
    return PML_COMPRESSION_LZ77;
  case 4 :
    return PML_COMPRESSION_LZ77_OBFUSCATED;
  case 10 :
    return PML_COMPRESSION_ZLIB;
  case 260 :
  case 272 :
    return PML_COMPRESSION_DRM;
  default :
    break;
  }

  return PML_COMPRESSION_UNKNOWN;
}

XorStream::XorStream(WPXInputStream *strm, unsigned char xorValue)
  : m_stream()
{
  if (0 != strm->seek(0, WPX_SEEK_END))
    throw StreamException();
  const long end = strm->tell();
  if (0 != strm->seek(0, WPX_SEEK_SET))
    throw StreamException();
  if (0 >= end)
    throw StreamException();

  unsigned long numBytesRead = 0;
  const unsigned char *bytes = strm->read(end, numBytesRead);
  if (static_cast<unsigned long>(end) != numBytesRead)
    throw StreamException();

  vector<unsigned char> data;
  data.reserve(numBytesRead);
  for (unsigned long i = 0; i != numBytesRead; ++i)
    data.push_back(bytes[i] ^ xorValue);

  m_stream.reset(new EBOOKMemoryStream(&data[0], data.size()));
}

bool XorStream::isOLEStream()
{
  return m_stream->isOLEStream();
}

WPXInputStream *XorStream::getDocumentOLEStream(const char *name)
{
  return m_stream->getDocumentOLEStream(name);
}

const unsigned char *XorStream::read(unsigned long numBytes, unsigned long &numBytesRead)
{
  return m_stream->read(numBytes, numBytesRead);
}

int XorStream::seek(long offset, WPX_SEEK_TYPE seekType)
{
  return m_stream->seek(offset, seekType);
}

long XorStream::tell()
{
  return m_stream->tell();
}

bool XorStream::atEOS()
{
  return m_stream->atEOS();
}

void toggle(bool &value)
{
  value = !value;
}

WPXPropertyList makeParagraphProperties(const PMLAttributes &attributes)
{
  WPXPropertyList props;

  if (attributes.right)
    props.insert("fo:text-align", "end");
  else if (attributes.center)
    props.insert("fo:text-align", "center");

  if (attributes.pageBreak)
    props.insert("fo:break-before", true);

  return props;
}

WPXPropertyList makeCharacterProperties(const PMLAttributes &attributes)
{
  WPXPropertyList props;

  if (attributes.italic)
    props.insert("fo:font-style", "italic");

  if (attributes.underline)
    props.insert("style:text-underline-type", "single");

  if (attributes.bold || (PML_FONT_TYPE_BOLD == attributes.font))
    props.insert("fo:font-weight", "bold");

  if (attributes.smallcaps)
    props.insert("fo:font-variant", "small-caps");

  return props;
}

}

namespace
{

/** Parser for the Palm Markup Language.
  */
class MarkupParser
{
  // -Weffc++
  MarkupParser(const MarkupParser &other);
  MarkupParser &operator=(const MarkupParser &other);

public:
  MarkupParser(WPXDocumentInterface *document, const PMLParser::ImageMap_t &imageMap);
  ~MarkupParser();

  /** Parse an input stream.
    *
    * The function can be called more than once. In that case, the
    * parsing continues with the old state.
    *
    * @arg[in] input input stream
    */
  void parse(WPXInputStream *input);

private:
  void readCommand();
  void readImage();
  void readFootnote();
  void readSidebar();
  void readCp1252Code();
  void readUnicodeCode();

  string readCommandArgument();
  void skipCommandArgument();

  void insertParagraphBreak();
  void flushText();

private:
  WPXDocumentInterface *m_document;
  const PMLParser::ImageMap_t &m_imageMap;

  WPXInputStream *m_input;

  PMLAttributes m_attributes;

  string m_text;

  bool m_paragraphOpened;

  bool m_visible;

  bool m_stdFontOpened;
  bool m_boldFontOpened;
  bool m_largeFontOpened;
};

}

namespace
{

MarkupParser::MarkupParser(WPXDocumentInterface *const document, const PMLParser::ImageMap_t &imageMap)
  : m_document(document)
  , m_imageMap(imageMap)
  , m_input(0)
  , m_attributes()
  , m_text()
  , m_paragraphOpened(false)
  , m_visible(true)
  , m_stdFontOpened(false)
  , m_boldFontOpened(false)
  , m_largeFontOpened(false)
{
}

MarkupParser::~MarkupParser()
{
}

void MarkupParser::parse(WPXInputStream *const input)
{
  WPXInputStream *const savedInput = m_input;
  m_input = input;

  while (!m_input->atEOS())
  {
    const unsigned char c = readU8(input);

    switch (c)
    {
    case '\\' :
      readCommand();
      break;
    case '\n' :
      insertParagraphBreak();
      break;
    default :
      m_text.push_back(c);
      break;
    }
  }

  m_input = savedInput;
}

void MarkupParser::readCommand()
{
  const unsigned char c = readU8(m_input);

  switch (c)
  {
  case 'p' :
    m_attributes.pageBreak = true;
    break;
  case 'x' :
    flushText();
    m_attributes.pageBreak = true;
    toggle(m_attributes.chapter);
    break;
  case 'X' :
    skip(m_input, 1);
    flushText();
    toggle(m_attributes.chapter);
    break;
  case 'C' :
  {
    // has no visible effect
    const unsigned char cc = readU8(m_input);
    if ('n' != cc)
    {
      EBOOK_DEBUG_MSG(("unknown command C%c\n", cc));
    }
    skipCommandArgument();
    break;
  }
  case 'c' :
    toggle(m_attributes.center);
    break;
  case 'r' :
    toggle(m_attributes.right);
    break;
  case 'i' :
    flushText();
    toggle(m_attributes.italic);
    break;
  case 'u' :
    flushText();
    toggle(m_attributes.underline);
    break;
  case 'o' :
    flushText();
    toggle(m_attributes.overstrike);
    break;
  case 'v' :
    flushText();
    toggle(m_visible);
    break;
  case 't' :
    toggle(m_attributes.indent);
    break;
  case 'T' :
    // TODO: implement me
    skipCommandArgument();
    break;
  case 'w' :
    // no rules
    skipCommandArgument();
    break;
  case 'n' :
    flushText();
    m_attributes.font = PML_FONT_TYPE_NORMAL;
    break;
  case 's' :
    flushText();
    toggle(m_stdFontOpened);
    if (m_stdFontOpened)
      m_attributes.font = PML_FONT_TYPE_STD;
    else
      m_attributes.font = PML_FONT_TYPE_NORMAL;
    break;
  case 'b' :
    flushText();
    toggle(m_boldFontOpened);
    if (m_boldFontOpened)
      m_attributes.font = PML_FONT_TYPE_BOLD;
    else
      m_attributes.font = PML_FONT_TYPE_NORMAL;
    break;
  case 'l' :
    flushText();
    toggle(m_largeFontOpened);
    if (m_largeFontOpened)
      m_attributes.font = PML_FONT_TYPE_LARGE;
    else
      m_attributes.font = PML_FONT_TYPE_NORMAL;
    break;
  case 'B' :
    flushText();
    toggle(m_attributes.bold);
    break;
  case 'S' :
  {
    const unsigned char cc = readU8(m_input);
    switch (cc)
    {
    case 'p' :
      flushText();
      toggle(m_attributes.superscript);
      break;
    case 'b' :
      flushText();
      toggle(m_attributes.subscript);
      break;
    case 'd' :
      readSidebar();
      break;
    default :
      EBOOK_DEBUG_MSG(("unknown command S%c\n", cc));
      break;
    }
    break;
  }
  case 'k' :
    flushText();
    toggle(m_attributes.smallcaps);
    break;
  case '\\' :
    m_text.push_back(c);
    break;
  case 'a' :
    readCp1252Code();
    break;
  case 'U' :
    readUnicodeCode();
    break;
  case 'm' :
    readImage();
    break;
  case 'q' :
  {
    const unsigned char cc = readU8(m_input);
    if ('=' == cc)
    {
      m_input->seek(-1, WPX_SEEK_CUR);
      skipCommandArgument();
    }
    break;
  }
  case 'Q' :
    // links are not implemented
    skipCommandArgument();
    break;
  case '-' :
    // TODO: implement me
    break;
  case 'F' :
  {
    const unsigned char cc = readU8(m_input);
    if ('n' != cc)
      EBOOK_DEBUG_MSG(("unknown command F%c\n", cc));
    else
      readFootnote();
    break;
  }
  case 'I' :
    // ignore
    break;
  default :
    EBOOK_DEBUG_MSG(("skipping unknown command %c\n", c));
    break;
  }
}

void MarkupParser::readImage()
{
  const std::string &name = readCommandArgument();

  const PMLParser::ImageMap_t::const_iterator it = m_imageMap.find(name);
  if (m_imageMap.end() != it)
  {
    WPXPropertyList props;
    props.insert("libwpd:mimetype", "image/png");
    const WPXBinaryData image(&it->second[0], it->second.size());
    m_document->insertBinaryObject(props, image);
  }
}

void MarkupParser::readFootnote()
{
  // TODO: implement me
  skipCommandArgument();

  while (true)
  {
    const unsigned char c = readU8(m_input);
    if ('\\' == c)
    {
      const unsigned char cF = readU8(m_input);
      const unsigned char cn = readU8(m_input);
      if (('F' != cF) || ('n' != cn))
      {
        EBOOK_DEBUG_MSG(("expected end of footnote, found %c%c\n", cF, cn));
        throw GenericException();
      }
      break;
    }
    else
    {
      // ignore footnote text
    }
  }
}

void MarkupParser::readSidebar()
{
  // TODO: implement me
  skipCommandArgument();

  while (true)
  {
    const unsigned char c = readU8(m_input);
    if ('\\' == c)
    {
      const unsigned char cS = readU8(m_input);
      const unsigned char cd = readU8(m_input);
      if (('S' != cS) || ('d' != cd))
      {
        EBOOK_DEBUG_MSG(("expected end of sidebar, found %c%c\n", cS, cd));
        throw GenericException();
      }
      break;
    }
    else
    {
      m_text.push_back(c);
    }
  }
}

void MarkupParser::readCp1252Code()
{
  const unsigned char *s = readNBytes(m_input, 3);
  (void) s;
  // TODO: implement me
}

void MarkupParser::readUnicodeCode()
{
  const unsigned char *s = readNBytes(m_input, 4);
  (void) s;
  // TODO: implement me
}

string MarkupParser::readCommandArgument()
{
  string value;

  if ('=' == readU8(m_input))
  {
    if ('"' == readU8(m_input))
    {
      unsigned char c = 0;
      while ('"' != (c = readU8(m_input)))
        value.push_back(c);
    }
  }

  return value;
}

void MarkupParser::skipCommandArgument()
{
  if ('=' == readU8(m_input))
  {
    if ('"' == readU8(m_input))
    {
      while ('"' != readU8(m_input))
        ;
    }
  }
}

void MarkupParser::insertParagraphBreak()
{
  flushText();

  if (m_paragraphOpened)
  {
    m_document->closeParagraph();
    m_paragraphOpened = false;
  }
}

void MarkupParser::flushText()
{
  if (!m_visible)
  {
    m_text.clear();
    return;
  }

  if (m_text.empty())
    return;

  if (!m_paragraphOpened)
  {
    m_document->openParagraph(makeParagraphProperties(m_attributes), WPXPropertyListVector());
    m_paragraphOpened = true;
    m_attributes.pageBreak = false;
  }

  m_document->openSpan(makeCharacterProperties(m_attributes));
  m_document->insertText(WPXString(m_text.c_str()));
  m_text.clear();
  m_document->closeSpan();
}

}

struct PMLHeader
{
  PMLHeader();

  unsigned nonTextRecordStart;
  unsigned imageCount;
  unsigned footnoteCount;
  unsigned sidebarCount;
  unsigned imageDataRecordStart;
  unsigned metadataRecordStart;
  unsigned footnoteRecordStart;
  unsigned sidebarRecordStart;
  unsigned lastDataRecord;
  Compression compression;
  bool hasMetadata;
  bool is202Header;
};

struct PMLTextParserState
{
  PMLTextParserState();

};

PMLHeader::PMLHeader()
  : nonTextRecordStart(0)
  , imageCount(0)
  , footnoteCount(0)
  , sidebarCount(0)
  , imageDataRecordStart(0)
  , metadataRecordStart(0)
  , footnoteRecordStart(0)
  , sidebarRecordStart(0)
  , lastDataRecord(0)
  , compression(PML_COMPRESSION_DRM)
  , hasMetadata(false)
  , is202Header(false)
{
}

PMLParser::PMLParser(WPXInputStream *const input, WPXDocumentInterface *const document)
  : PDXParser(input, document)
  , m_header(new PMLHeader())
  , m_imageMap()
{
}

PMLParser::~PMLParser()
{
}

bool PMLParser::isFormatSupported(unsigned type, unsigned creator)
{
  if ((PML_TYPE != type) || (PML_CREATOR != creator))
    return false;

  const scoped_ptr<WPXInputStream> record(getIndexRecord());
  const Compression compression = readCompression(record.get());
  return !((PML_COMPRESSION_UNKNOWN == compression) || (PML_COMPRESSION_DRM == compression));
}

void PMLParser::readAppInfoRecord(WPXInputStream *)
{
}

void PMLParser::readSortInfoRecord(WPXInputStream *)
{
}

void PMLParser::readIndexRecord(WPXInputStream *record)
{
  record->seek(0, WPX_SEEK_END);
  const unsigned long size = record->tell();
  record->seek(0, WPX_SEEK_SET);

  if (132 == size) // header created by Dropbook
  {
    m_header->compression = readCompression(record);
    skip(record, 10);
    m_header->nonTextRecordStart = readU16(record, true);
    skip(record, 6);
    m_header->imageCount = readU16(record, true);
    skip(record, 2);
    m_header->hasMetadata = 1 == readU16(record, true);
    skip(record, 2);
    m_header->footnoteCount = readU16(record, true);
    m_header->sidebarCount = readU16(record, true);
    skip(record, 8);
    m_header->imageDataRecordStart = readU16(record, true);
    skip(record, 2);
    m_header->metadataRecordStart = readU16(record, true);
    skip(record, 2);
    m_header->footnoteRecordStart = readU16(record, true);
    m_header->sidebarRecordStart = readU16(record, true);
    m_header->lastDataRecord = readU16(record, true);
  }
  else if (202 == size) // header created by Makebook
  {
    m_header->is202Header = true;
    m_header->compression = PML_COMPRESSION_LZ77_OBFUSCATED;
    skip(record, 8);
    m_header->nonTextRecordStart = readU16(record, true);
  }
}

void PMLParser::readDataRecord(WPXInputStream *record, bool last)
{
  // TODO: implement me
  (void) record;
  (void) last;
}

void PMLParser::readDataRecords()
{
  readImages();
  readText();
}

void PMLParser::readText()
{
  openDocument();

  if (PML_COMPRESSION_LZ77 == m_header->compression)
  {
    MarkupParser parser(getDocument(), m_imageMap);
    for (unsigned i = 1; i < m_header->nonTextRecordStart - 1; ++i)
    {
      const scoped_ptr<WPXInputStream> record(getDataRecord(i - 1));
      PDXLZ77Stream compressedInput(record.get());
      parser.parse(&compressedInput);
    }
  }
  else if (PML_COMPRESSION_LZ77_OBFUSCATED == m_header->compression)
  {
    MarkupParser parser(getDocument(), m_imageMap);
    for (unsigned i = 1; i < m_header->nonTextRecordStart - 1; ++i)
    {
      const scoped_ptr<WPXInputStream> record(getDataRecord(i - 1));
      XorStream unobfuscatedInput(record.get(), 0xa5);
      PDXLZ77Stream uncompressedInput(&unobfuscatedInput);
      parser.parse(&uncompressedInput);
    }
  }
  else if (PML_COMPRESSION_ZLIB == m_header->compression)
  {
    MarkupParser parser(getDocument(), m_imageMap);
    const scoped_ptr<WPXInputStream> block(getDataRecords(0, m_header->nonTextRecordStart - 1));
    EBOOKZlibStream uncompressedInput(block.get());
    parser.parse(&uncompressedInput);
  }
  else
    throw GenericException();

  closeDocument();
}

void PMLParser::readImages()
{
  if (m_header->is202Header)
  {
    // detect images
    for (unsigned i = m_header->nonTextRecordStart; i < getDataRecordCount(); ++i)
    {
      const scoped_ptr<WPXInputStream> record(getDataRecord(i));

      unsigned char p = readU8(record.get());
      unsigned char n = readU8(record.get());
      unsigned char g = readU8(record.get());
      unsigned char s = readU8(record.get());
      if (('P' == p) && ('N' == n) && ('G' == g) && (' ' == s))
        readImage(record.get(), true);
    }
  }
  else
  {
    for (unsigned i = 0; i != m_header->imageCount; ++i)
    {
      const scoped_ptr<WPXInputStream> record(getDataRecord(m_header->imageDataRecordStart + i - 1));
      readImage(record.get());
    }
  }
}

void PMLParser::readImage(WPXInputStream *const record, const bool verified)
{
  if (!verified)
    skip(record, 4); // "PNG "

  const unsigned char *name = readNBytes(record, 32);

  skip(record, 62 - 36);

  vector<unsigned char> data;
  while (!record->atEOS())
    data.push_back(readU8(record));

  m_imageMap.insert(ImageMap_t::value_type(string(reinterpret_cast<const char *>(name)), data));
}

void PMLParser::openDocument()
{
  getDocument()->startDocument();
  WPXPropertyList metadata;
  getDocument()->setDocumentMetaData(metadata);
  getDocument()->openPageSpan(WPXPropertyList());
}

void PMLParser::closeDocument()
{
  getDocument()->closePageSpan();
  getDocument()->endDocument();
}

} // namespace libebook

/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
