Parser.h 4.77 KB
Newer Older
Claudio Valerio's avatar
Claudio Valerio committed
1
/*
2
 * Copyright (C) 2012 Webdoc SA
Claudio Valerio's avatar
Claudio Valerio committed
3
 *
4 5 6 7 8 9 10 11 12 13
 * This file is part of Open-Sankoré.
 *
 * Open-Sankoré is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation, version 2,
 * with a specific linking exception for the OpenSSL project's
 * "OpenSSL" library (or with modified versions of it that use the
 * same license as the "OpenSSL" library).
 *
 * Open-Sankoré is distributed in the hope that it will be useful,
Claudio Valerio's avatar
Claudio Valerio committed
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
Claudio Valerio's avatar
Claudio Valerio committed
17
 *
18 19 20
 * You should have received a copy of the GNU Library General Public
 * License along with Open-Sankoré; if not, see
 * <http://www.gnu.org/licenses/>.
Claudio Valerio's avatar
Claudio Valerio committed
21
 */
22 23


24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
#if !defined Parser_h
#define Parser_h

#include "Object.h"
#include "Document.h"
#include "Page.h"

#include <string>
#include <vector>


namespace merge_lib
{
   class Document;

   //This class parsed the pdf document and creates
   //an Document object
   class Parser
   {
   public:   
      Parser(): _root(0), _fileContent(), _objects(), _document(0)  {};
      Document * parseDocument(const char * fileName);

      static const std::string WHITESPACES;
      static const std::string DELIMETERS;
      static const std::string NUMBERS;
      static const std::string WHITESPACES_AND_DELIMETERS;

      static bool getNextWord(std::string & out, const std::string &in, size_t &nextPosition,size_t *found = NULL);
      static std::string getNextToken( const std::string &in, unsigned &position);
      static void trim(std::string &str);
      static std::string findTokenStr(const std::string &content, const std::string &pattern, size_t start,size_t &foundStart, size_t &foundEnd); 

      static size_t findToken(const std::string &content, const std::string &keyword,size_t start = 0);
      static size_t findTokenName(const std::string &content, const std::string &keyword,size_t start = 0);
      static unsigned int findEndOfElementContent(const std::string &content, unsigned int startOfPageElement);
      static bool tokenIsAName(const std::string &content, size_t start );
   protected:
      const std::string &                           _getObjectContent(unsigned int objectPosition, unsigned int & objectNumber, unsigned int & generationNumber, std::pair<unsigned int, unsigned int> &, bool &);
	  virtual unsigned int                          _readTrailerAndReturnRoot();
   private:
      //methods
      virtual void                                  _getFileContent(const char * fileName);
      bool                                          _getNextObject(Object * object);
      void                                          _callObserver(std::string objectContent);
      void                                          _createObjectTree(const char * fileName);
      void                                          _retrieveAllPages(Object * objectWithKids);
      void                                          _fillOutObjects();
      virtual void                                  _readXRefAndCreateObjects();
      unsigned int                                  _getEndOfLineFromContent(unsigned int fromPosition);
      const std::pair<unsigned int, unsigned int> & _getLineBounds(const std::string & str, unsigned int fromPosition);
      const std::string &                           _getNextToken(unsigned int & fromPosition);
      unsigned int                                  _countTokens(unsigned int leftBound, unsigned int rightBount);
      unsigned int                                  _skipWhiteSpaces(const std::string & str);
      unsigned int                                  _skipWhiteSpacesFromContent(unsigned int fromPosition);
      const std::map<unsigned int, Object::ReferencePositionsInContent> & _getReferences(const std::string & objectContent);
      unsigned int                                  _skipNumber(const std::string & str, unsigned int currentPosition);	  
      unsigned int                                  _skipWhiteSpaces(const std::string & str, unsigned int fromPosition);
      void                                          _createDocument(const char * docName);      
      virtual unsigned int                          _getStartOfXrefWithRoot();
      unsigned int                                  _readTrailerAndRterievePrev(const unsigned int startPositionForSearch, unsigned int & previosXref);
      void                                          _clearParser();      
      

   protected:  

      //members
      Object *                         _root;
      std::string                      _fileContent;
      std::map<unsigned int, Object *> _objects;
      Document *                       _document;
      
   };
}
#endif