Parser.h 4.95 KB
Newer Older
Claudio Valerio's avatar
Claudio Valerio committed
1
/*
2
 * Copyright (C) 2015-2018 Département de l'Instruction Publique (DIP-SEM)
Craig Watson's avatar
Craig Watson committed
3
 *
Claudio Valerio's avatar
Claudio Valerio committed
4
 * Copyright (C) 2013 Open Education Foundation
Claudio Valerio's avatar
Claudio Valerio committed
5
 *
Claudio Valerio's avatar
Claudio Valerio committed
6 7
 * Copyright (C) 2010-2013 Groupement d'Intérêt Public pour
 * l'Education Numérique en Afrique (GIP ENA)
8
 *
Claudio Valerio's avatar
Claudio Valerio committed
9 10 11
 * This file is part of OpenBoard.
 *
 * OpenBoard is free software: you can redistribute it and/or modify
Claudio Valerio's avatar
Claudio Valerio committed
12 13
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License,
14 15 16 17
 * with a specific linking exception for the OpenSSL project's
 * "OpenSSL" library (or with modified versions of it that use the
 * same license as the "OpenSSL" library).
 *
Claudio Valerio's avatar
Claudio Valerio committed
18
 * OpenBoard is distributed in the hope that it will be useful,
Claudio Valerio's avatar
Claudio Valerio committed
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Claudio Valerio's avatar
Claudio Valerio committed
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Claudio Valerio's avatar
Claudio Valerio committed
21
 * GNU General Public License for more details.
Claudio Valerio's avatar
Claudio Valerio committed
22
 *
Claudio Valerio's avatar
Claudio Valerio committed
23
 * You should have received a copy of the GNU General Public License
Claudio Valerio's avatar
Claudio Valerio committed
24
 * along with OpenBoard. If not, see <http://www.gnu.org/licenses/>.
Claudio Valerio's avatar
Claudio Valerio committed
25
 */
26 27


Claudio Valerio's avatar
Claudio Valerio committed
28

Claudio Valerio's avatar
Claudio Valerio committed
29

30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
#if !defined Parser_h
#define Parser_h

#include "Object.h"
#include "Document.h"
#include "Page.h"

#include <string>
#include <vector>


namespace merge_lib
{
   class Document;

   //This class parsed the pdf document and creates
   //an Document object
   class Parser
   {
   public:   
      Parser(): _root(0), _fileContent(), _objects(), _document(0)  {};
      Document * parseDocument(const char * fileName);

      static const std::string WHITESPACES;
      static const std::string DELIMETERS;
      static const std::string NUMBERS;
      static const std::string WHITESPACES_AND_DELIMETERS;

      static bool getNextWord(std::string & out, const std::string &in, size_t &nextPosition,size_t *found = NULL);
      static std::string getNextToken( const std::string &in, unsigned &position);
      static void trim(std::string &str);
      static std::string findTokenStr(const std::string &content, const std::string &pattern, size_t start,size_t &foundStart, size_t &foundEnd); 

      static size_t findToken(const std::string &content, const std::string &keyword,size_t start = 0);
      static size_t findTokenName(const std::string &content, const std::string &keyword,size_t start = 0);
      static unsigned int findEndOfElementContent(const std::string &content, unsigned int startOfPageElement);
      static bool tokenIsAName(const std::string &content, size_t start );
   protected:
      const std::string &                           _getObjectContent(unsigned int objectPosition, unsigned int & objectNumber, unsigned int & generationNumber, std::pair<unsigned int, unsigned int> &, bool &);
Claudio Valerio's avatar
Claudio Valerio committed
69
      virtual unsigned int                          _readTrailerAndReturnRoot();
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
   private:
      //methods
      virtual void                                  _getFileContent(const char * fileName);
      bool                                          _getNextObject(Object * object);
      void                                          _callObserver(std::string objectContent);
      void                                          _createObjectTree(const char * fileName);
      void                                          _retrieveAllPages(Object * objectWithKids);
      void                                          _fillOutObjects();
      virtual void                                  _readXRefAndCreateObjects();
      unsigned int                                  _getEndOfLineFromContent(unsigned int fromPosition);
      const std::pair<unsigned int, unsigned int> & _getLineBounds(const std::string & str, unsigned int fromPosition);
      const std::string &                           _getNextToken(unsigned int & fromPosition);
      unsigned int                                  _countTokens(unsigned int leftBound, unsigned int rightBount);
      unsigned int                                  _skipWhiteSpaces(const std::string & str);
      unsigned int                                  _skipWhiteSpacesFromContent(unsigned int fromPosition);
      const std::map<unsigned int, Object::ReferencePositionsInContent> & _getReferences(const std::string & objectContent);
Claudio Valerio's avatar
Claudio Valerio committed
86
      unsigned int                                  _skipNumber(const std::string & str, unsigned int currentPosition);      
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
      unsigned int                                  _skipWhiteSpaces(const std::string & str, unsigned int fromPosition);
      void                                          _createDocument(const char * docName);      
      virtual unsigned int                          _getStartOfXrefWithRoot();
      unsigned int                                  _readTrailerAndRterievePrev(const unsigned int startPositionForSearch, unsigned int & previosXref);
      void                                          _clearParser();      
      

   protected:  

      //members
      Object *                         _root;
      std::string                      _fileContent;
      std::map<unsigned int, Object *> _objects;
      Document *                       _document;
      
   };
}
#endif