
#include "PDF.h"
#include <sys/stat.h>
#include <fstream>
#include <string>
#include <cctype>
#include "PDF_rep.h"

int PDF::file_length( const PDF &pdf ) {
  return pdf.rep->file_length1;
}

int PDF::offset_last_xref_table( const std::string &pdf_filename ) {

  // Update in 2015, seven years after most of the rest of the file and
  // program were written:
  //
  // In an earlier version of the progam, this function's body was
  // a one-liner:
  //
  //   return pdf.rep->xref->getLastXRefPos();
  //
  // That worked fine until Libpoppler changed its interface, since
  // which it has hidden the required function in the private section
  // of an interface, unusable here.  In a later version of the
  // program, this function's body was a different one-liner:
  //
  //   return pdf.rep->xref->getEntry(0)->offset;
  //
  // This unfortunately does the wrong thing, though, with effects
  // Salvatore Bonaccorso has noticed and brought to attention.
  // Accordingly, this function itself must now find the position of
  // the last XRef table, as follows.
  //
  // Fortunately, the PDF standard requires the position of an XRef
  // table to be given in plain ascii, so finding the position is not
  // too hard. One must only be sure to find the position of
  // the *last* XRef table.
  //
  // The code is not quite as elegant as it might be, but the whole
  // program needs cleaning up, so let us not worry about that for now.
  // (The programmer's C++ style was pretty immature back in 2008 in
  // any case.)
  //
  //

  const char key_token[] = "startxref";

  int offset = -1;

  std::ifstream pdf_file(pdf_filename);
  bool has_preceding_whitespace = true;
  char digit_stage[] = " ";
  int c = std::ifstream::traits_type::eof();
  const char *p = key_token;

  while (true) {

    c = pdf_file.get();
    if (c == std::ifstream::traits_type::eof()) goto done;

    if (!has_preceding_whitespace || c != *p) {
      p = key_token;
      has_preceding_whitespace = std::isspace(c);
    }

    else {

      ++p;

      if (!*p) {

        // Skip whitespace between key token and offset.
        bool has_trailing_whitespace = false;
        while (true) {
          c = pdf_file.get();
          if (c == std::ifstream::traits_type::eof()) goto done;
          if (!std::isspace(c)) break;
          has_trailing_whitespace = true;
        }

        if (has_trailing_whitespace) {

          // The key token has been found, so prepare to read the offset.
          offset = -1;

          // Read the offset.
          if (std::isdigit(c)) {
            digit_stage[0] = c;
            offset = std::atoi(digit_stage);
            while (true) {
              c = pdf_file.get();
              if (c == std::ifstream::traits_type::eof()) goto done;
              if (!std::isdigit(c)) break;
              offset *= 10;
              digit_stage[0] = c;
              offset += std::atoi(digit_stage);
            }
          }

        }

        p = key_token;

      }

    }

  }

  done: return offset;

}

PDF::Iref PDF::iref_catalog( const PDF &pdf ) {
  XRef *const xref = pdf.rep->xref;
  return Iref( xref->getRootNum(), xref->getRootGen() );
}

PDF::Iref PDF::iref_info( const PDF &pdf ) {
  return pdf.rep->info_iref;
}

int PDF::n_obj( const PDF &pdf ) {
  return pdf.rep->n_obj1;
}

int PDF::offset( const PDF &pdf, const int i ) {
  return pdf.rep->xref->getEntry(i)->offset;
}

int PDF::n_page( const PDF &pdf ) {
  return pdf.rep->catalog2->getNumPages();
}

int PDF::i_page(
  const PDF &pdf,
  const Iref iref,
  const bool do_not_throw
) {
  const int i = pdf.rep->catalog2->findPage( iref.i, iref.gen );
  if (!do_not_throw && !i) throw PDF::Exc_PDF();
  return i;
}

PDF::Iref PDF::iref_page( const PDF &pdf, const int i ) {
  const Ref *const rp = pdf.rep->catalog2->getPageRef(i);
  if (!rp) throw PDF::Exc_PDF();
  return Iref( rp->num, rp->gen );
}

// The programmer does not feel sure that a little memory is not leaking
// here.  The amount of memory in question is small, and of course the
// system reclaims leaked memory at execution's end, anyway, so the leak
// if any is not serious; but even if not serious, leaking still is not
// neat.  The only documentation for Libpoppler appears to consist of
// its development headers, which seem insufficiently informative in the
// matter.  For these reasons, where in doubt, rather than risking
// improper deallocation, the code leaks.

PDF::PDF::PDF( const std::string &filename_pdf )
  : rep( new PDF_rep() )
{
  {
    struct stat s;
    if ( stat( filename_pdf.c_str(), &s ) ) throw Exc_IO();
    rep->file_length1 = s.st_size;
  }
  {
    GooString gs( filename_pdf.c_str() );
    rep->pdfdoc = new PDFDoc(&gs);
    if ( !rep->pdfdoc->isOk() ) throw Exc_IO();
  }
  {
    rep->xref = rep->pdfdoc->getXRef();
    if ( !rep->xref->isOk() ) throw Exc_PDF();
  }
  {
    Object *const obj = rep->xref->getTrailerDict();
    if ( !obj->isDict() ) throw Exc_PDF();
    rep->trailer = obj->getDict();
  }
  {
    Object obj;
    { char s[] = "Size"; rep->trailer->lookup( s, &obj ); }
    if ( !obj.isInt() ) throw Exc_PDF();
    rep->n_obj1 = obj.getInt();
  }
  {
    rep->catalog_obj = new Object();
    rep->xref->getCatalog( rep->catalog_obj );
    if ( !rep->catalog_obj->isDict() ) throw Exc_PDF();
    rep->catalog = rep->catalog_obj->getDict();
  }
  {
    rep->catalog2 = rep->pdfdoc->getCatalog();
    if ( !rep->catalog2->isOk() ) throw Exc_PDF();
  }
  {
    Object obj;
    { char s[] = "Info"; rep->trailer->lookupNF( s, &obj ); }
    if ( !obj.isRef() ) throw Exc_PDF();
    const Ref ref = obj.getRef();
    rep->info_iref = Iref( ref.num, ref.gen );
  }
  {
    rep->info_obj = new Object();
    rep->xref->fetch(
      rep->info_iref.i,
      rep->info_iref.gen,
      rep->info_obj
    );
    if ( !rep->info_obj->isDict() ) throw Exc_PDF();
    rep->info = rep->info_obj->getDict();
  }
}

PDF::PDF::~PDF() {
  delete rep->catalog_obj;
  delete rep->info_obj;
  // For reasons this programmer does not understand, the Libpoppler PDFDoc
  // object does not seem to deallocate gracefully.  It is allowed to leak for
  // this reason.
  //delete rep->pdfdoc;
  delete rep;
}

PDF::PDF_rep *PDF::PDF::get_PDF_rep( const int magic ) {
  // The function demands a magic integer precisely to discourage
  // callers from calling it, and conversely to prevent it from
  // returning disruptive information to unwitting callers.  The integer
  // serves no other purpose.  Its value is not elsewhere documented.
  // If you must call this function, then supply the integer.  (The
  // integer's value has 1s in the zeroth and fifteenth bits, with six
  // more 1s scattered randomly across the fourteen places between.  It
  // has no significance.)
  return magic == 0x9f05 ? rep : 0;
}

