Commit 7564a8ba authored by Claudio Valerio's avatar Claudio Valerio

moved pdf merger file into src directory.

parent 97cee1b6
#include <iostream> #include <iostream>
#include "ASCII85Decode.h" #include "ASCII85Decode.h"
using namespace merge_lib; using namespace merge_lib;
static const unsigned long pow85[] = { static const unsigned long pow85[] = {
85*85*85*85, 85*85*85, 85*85, 85, 1 85*85*85*85, 85*85*85, 85*85, 85, 1
}; };
void ASCII85Decode::_wput(std::string &cur,unsigned long tuple, int len) void ASCII85Decode::_wput(std::string &cur,unsigned long tuple, int len)
{ {
switch (len) switch (len)
{ {
case 4: case 4:
cur+=static_cast<char>(tuple >> 24); cur+=static_cast<char>(tuple >> 24);
cur+=static_cast<char>(tuple >> 16); cur+=static_cast<char>(tuple >> 16);
cur+=static_cast<char>(tuple >> 8); cur+=static_cast<char>(tuple >> 8);
cur+=static_cast<char>(tuple); cur+=static_cast<char>(tuple);
break; break;
case 3: case 3:
cur+=static_cast<char>(tuple >> 24); cur+=static_cast<char>(tuple >> 24);
cur+=static_cast<char>(tuple >> 16); cur+=static_cast<char>(tuple >> 16);
cur+=static_cast<char>(tuple >> 8); cur+=static_cast<char>(tuple >> 8);
break; break;
case 2: case 2:
cur+=static_cast<char>(tuple >> 24); cur+=static_cast<char>(tuple >> 24);
cur+=static_cast<char>(tuple >> 16); cur+=static_cast<char>(tuple >> 16);
break; break;
case 1: case 1:
cur+=static_cast<char>(tuple >> 24); cur+=static_cast<char>(tuple >> 24);
break; break;
default: default:
std::cerr<<"Asci85Filter - unexpected len = "<<len<<"\n"; std::cerr<<"Asci85Filter - unexpected len = "<<len<<"\n";
break; break;
} }
} }
bool ASCII85Decode::decode(std::string &encoded) bool ASCII85Decode::decode(std::string &encoded)
{ {
unsigned long tuple = 0; unsigned long tuple = 0;
std::string decoded = ""; std::string decoded = "";
int count = 0; int count = 0;
int size = encoded.size(); int size = encoded.size();
int i = 0; int i = 0;
bool found = false; bool found = false;
for(;size;) for(;size;)
{ {
char ch = encoded[i++]; char ch = encoded[i++];
// sometimes <~ can present. // sometimes <~ can present.
switch(ch) switch(ch)
{ {
default: default:
if( ch < '!' || ch > 'u' ) if( ch < '!' || ch > 'u' )
{ {
std::cerr<<"bag character in ascii85 block["<<ch<<"]\n"; std::cerr<<"bag character in ascii85 block["<<ch<<"]\n";
return false; return false;
} }
tuple += (unsigned long)(ch - '!') * pow85[count++]; tuple += (unsigned long)(ch - '!') * pow85[count++];
if( count == 5) if( count == 5)
{ {
_wput(decoded,tuple,4); _wput(decoded,tuple,4);
count = 0; count = 0;
tuple = 0; tuple = 0;
} }
break; break;
case 'z': case 'z':
if( count != 0 ) if( count != 0 )
{ {
std::cerr<<"Z inside of acii85 5-tuple!\n"; std::cerr<<"Z inside of acii85 5-tuple!\n";
return false; return false;
} }
decoded += "\0\0\0\0"; decoded += "\0\0\0\0";
break; break;
case '~': case '~':
if( --size ) if( --size )
{ {
ch = encoded[i++]; ch = encoded[i++];
if( ch == '>') if( ch == '>')
{ {
if( count > 0 ) if( count > 0 )
{ {
count --; count --;
tuple += pow85[count]; tuple += pow85[count];
_wput(decoded,tuple,count); _wput(decoded,tuple,count);
} }
} }
encoded = decoded; encoded = decoded;
return true; return true;
} }
std::cerr<<"~ without > in ascii85 stream!\n = ["<<ch<<"]\n"; std::cerr<<"~ without > in ascii85 stream!\n = ["<<ch<<"]\n";
encoded = decoded; encoded = decoded;
return false; return false;
break; break;
case '\n': case '\n':
case '\r': case '\r':
case '\t': case '\t':
case ' ': case ' ':
case '\0': case '\0':
case '\f': case '\f':
case '\b': case '\b':
case 0177: case 0177:
break; break;
} }
--size; --size;
} }
return true; return true;
} }
#ifndef ASCII85Decode_H #ifndef ASCII85Decode_H
#define ASCII85Decode_H #define ASCII85Decode_H
#include <string> #include <string>
#include "Decoder.h" #include "Decoder.h"
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class ASCII85Decode : public Decoder class ASCII85Decode : public Decoder
{ {
public: public:
ASCII85Decode(){}; ASCII85Decode(){};
virtual ~ASCII85Decode(){}; virtual ~ASCII85Decode(){};
bool encode(std::string & decoded) {return false;} bool encode(std::string & decoded) {return false;}
bool decode(std::string & encoded); bool decode(std::string & encoded);
void initialize(Object * objectWithStram){}; void initialize(Object * objectWithStram){};
private: private:
void _wput(std::string &cur,unsigned long tuple, int len); void _wput(std::string &cur,unsigned long tuple, int len);
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED
#include "ASCIIHexDecode.h" #include "ASCIIHexDecode.h"
#include <string> #include <string>
#include "Utils.h" #include "Utils.h"
using namespace merge_lib; using namespace merge_lib;
const std::string WHITESPACES(" \t\f\v\n\r"); const std::string WHITESPACES(" \t\f\v\n\r");
#define HEX_TO_VAL(char_c) (char_c)>9?'A'+(char_c)-10:'0'+(char_c); #define HEX_TO_VAL(char_c) (char_c)>9?'A'+(char_c)-10:'0'+(char_c);
static unsigned int convertHexVal(unsigned char c) static unsigned int convertHexVal(unsigned char c)
{ {
if(c >= '0' && c <= '9') if(c >= '0' && c <= '9')
{ {
return (c - '0'); return (c - '0');
} }
if(c >= 'A' && c <= 'F') if(c >= 'A' && c <= 'F')
{ {
return (c - 'A' + 10); return (c - 'A' + 10);
} }
if(c >= 'a' && c <= 'f') if(c >= 'a' && c <= 'f')
{ {
return (c - 'a' + 10); return (c - 'a' + 10);
} }
return 0; return 0;
} }
bool ASCIIHexDecode::decode(std::string & encoded) bool ASCIIHexDecode::decode(std::string & encoded)
{ {
bool isLow = true; bool isLow = true;
unsigned char decodedChar = '\0'; unsigned char decodedChar = '\0';
int len = encoded.size(); int len = encoded.size();
std::string decoded =""; std::string decoded ="";
for(int i = 0;i<len;i++) for(int i = 0;i<len;i++)
{ {
unsigned char ch = encoded[i]; unsigned char ch = encoded[i];
if( WHITESPACES.find(ch) != std::string::npos ) if( WHITESPACES.find(ch) != std::string::npos )
{ {
continue; continue;
} }
if( ch == '>' ) if( ch == '>' )
{ {
continue; // EOD found continue; // EOD found
} }
ch = convertHexVal(ch); ch = convertHexVal(ch);
if( isLow ) if( isLow )
{ {
decodedChar = (ch & 0x0F); decodedChar = (ch & 0x0F);
isLow = false; isLow = false;
} }
else else
{ {
decodedChar = ((decodedChar << 4) | ch); decodedChar = ((decodedChar << 4) | ch);
isLow = true; isLow = true;
decoded += decodedChar; decoded += decodedChar;
} }
} }
encoded = decoded; encoded = decoded;
return true; return true;
} }
#ifndef ASCIIHexDecode_h #ifndef ASCIIHexDecode_h
#define ASCIIHexDecode_h #define ASCIIHexDecode_h
#include <string> #include <string>
#include "Decoder.h" #include "Decoder.h"
namespace merge_lib namespace merge_lib
{ {
// this class provides method for ASCIIHEX encoding and decoding // this class provides method for ASCIIHEX encoding and decoding
class ASCIIHexDecode : public Decoder class ASCIIHexDecode : public Decoder
{ {
public: public:
ASCIIHexDecode(){}; ASCIIHexDecode(){};
virtual ~ASCIIHexDecode(){}; virtual ~ASCIIHexDecode(){};
bool encode(std::string & decoded){return false;} bool encode(std::string & decoded){return false;}
bool decode(std::string & encoded); bool decode(std::string & encoded);
void initialize(Object * objectWithStram){}; void initialize(Object * objectWithStram){};
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED
#if !defined AbstractBoxElementHandler_h #if !defined AbstractBoxElementHandler_h
#define AbstractBoxElementHandler_h #define AbstractBoxElementHandler_h
#include "PageElementHandler.h" #include "PageElementHandler.h"
namespace merge_lib namespace merge_lib
{ {
//class for processing MediaBox field of Page object //class for processing MediaBox field of Page object
class AbstractBoxElementHandler: public PageElementHandler class AbstractBoxElementHandler: public PageElementHandler
{ {
public: public:
AbstractBoxElementHandler(Object * page): PageElementHandler(page) AbstractBoxElementHandler(Object * page): PageElementHandler(page)
{ {
} }
virtual ~AbstractBoxElementHandler() virtual ~AbstractBoxElementHandler()
{ {
} }
protected: protected:
void _retrieveBoxFromParent() void _retrieveBoxFromParent()
{ {
std::string content = _page->getObjectContent(); std::string content = _page->getObjectContent();
std::string mediaBox; std::string mediaBox;
Object * parent = _page; Object * parent = _page;
while(1) while(1)
{ {
unsigned int startOfParent = content.find("/Parent"); unsigned int startOfParent = content.find("/Parent");
unsigned int endOfParent = content.find(" R", startOfParent); unsigned int endOfParent = content.find(" R", startOfParent);
if(startOfParent == std::string::npos) if(startOfParent == std::string::npos)
break; break;
std::vector <Object *> parents = parent->getChildrenByBounds(startOfParent, endOfParent); std::vector <Object *> parents = parent->getChildrenByBounds(startOfParent, endOfParent);
if(parents.size() != 1) if(parents.size() != 1)
break; break;
parent = parents[0]; parent = parents[0];
std::string parentContent = parent->getObjectContent(); std::string parentContent = parent->getObjectContent();
unsigned int startOfMediaBox = parentContent.find(_handlerName); unsigned int startOfMediaBox = parentContent.find(_handlerName);
if(startOfMediaBox == std::string::npos) if(startOfMediaBox == std::string::npos)
{ {
content = parentContent; content = parentContent;
continue; continue;
} }
unsigned int endOfMediaBox = parentContent.find("]", startOfMediaBox); unsigned int endOfMediaBox = parentContent.find("]", startOfMediaBox);
mediaBox = parentContent.substr(startOfMediaBox, endOfMediaBox - startOfMediaBox + 1); mediaBox = parentContent.substr(startOfMediaBox, endOfMediaBox - startOfMediaBox + 1);
break; break;
} }
if(!mediaBox.empty()) if(!mediaBox.empty())
{ {
unsigned int startOfMediaBox = _page->getObjectContent().rfind(">>"); unsigned int startOfMediaBox = _page->getObjectContent().rfind(">>");
_page->insertToContent(startOfMediaBox, mediaBox); _page->insertToContent(startOfMediaBox, mediaBox);
_changeObjectContent(startOfMediaBox); _changeObjectContent(startOfMediaBox);
} }
} }
private: private:
virtual void _changeObjectContent(unsigned int startOfPageElement) = 0; virtual void _changeObjectContent(unsigned int startOfPageElement) = 0;
}; };
} }
#endif #endif
#if !defined AnnotsHandler_h #if !defined AnnotsHandler_h
#define AnnotsHandler_h #define AnnotsHandler_h
#include "RemoveHimSelfHandler.h" #include "RemoveHimSelfHandler.h"
//this class is needed to process "Annots" field //this class is needed to process "Annots" field
//AnnotsHandler parses Annots field of Page object and fills //AnnotsHandler parses Annots field of Page object and fills
//annots container with Annots objects //annots container with Annots objects
namespace merge_lib namespace merge_lib
{ {
class AnnotsHandler: public RemoveHimselfHandler class AnnotsHandler: public RemoveHimselfHandler
{ {
public: public:
AnnotsHandler(Object * page, const std::string & handlerName, std::vector<Object *> & annots): AnnotsHandler(Object * page, const std::string & handlerName, std::vector<Object *> & annots):
RemoveHimselfHandler(page, handlerName), RemoveHimselfHandler(page, handlerName),
_annotations(annots) _annotations(annots)
{ {
_setHandlerName(handlerName); _setHandlerName(handlerName);
} }
private: private:
//methods //methods
void _processObjectContent(unsigned int startOfPageElement); void _processObjectContent(unsigned int startOfPageElement);
//memebers //memebers
std::vector<Object *> & _annotations; std::vector<Object *> & _annotations;
}; };
} }
#endif #endif
#ifndef CCITTFaxDecode_H #ifndef CCITTFaxDecode_H
#define CCITTFaxDecode_H #define CCITTFaxDecode_H
#include <string> #include <string>
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class CCITTFaxDecode : public Decoder class CCITTFaxDecode : public Decoder
{ {
public: public:
CCITTFaxDecode(){}; CCITTFaxDecode(){};
virtual ~CCITTFaxDecode(){}; virtual ~CCITTFaxDecode(){};
bool encode(std::string & decoded) {return true;}; bool encode(std::string & decoded) {return true;};
bool decode(std::string & encoded) {return true;}; bool decode(std::string & encoded) {return true;};
void initialize(Object * objectWithStram){}; void initialize(Object * objectWithStram){};
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED
#include "ContentHandler.h" #include "ContentHandler.h"
#include "Filter.h" #include "Filter.h"
#include "FlateDecode.h" #include "FlateDecode.h"
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <string.h> #include <string.h>
using namespace merge_lib; using namespace merge_lib;
using namespace std; using namespace std;
//concatenate stream of all objects which contain Content of Page //concatenate stream of all objects which contain Content of Page
void ContentHandler::_processObjectContent(unsigned int startOfPageElement) void ContentHandler::_processObjectContent(unsigned int startOfPageElement)
{ {
unsigned int endOfPage = _findEndOfElementContent(startOfPageElement); unsigned int endOfPage = _findEndOfElementContent(startOfPageElement);
_concatenatedStream = _retrieveStreamContent(_page, startOfPageElement, endOfPage); _concatenatedStream = _retrieveStreamContent(_page, startOfPageElement, endOfPage);
FlateDecode flate; FlateDecode flate;
flate.encode(_concatenatedStream); flate.encode(_concatenatedStream);
} }
//write concatenated stream to Page object //write concatenated stream to Page object
void ContentHandler::_changeObjectContent(unsigned int startOfPageElement) void ContentHandler::_changeObjectContent(unsigned int startOfPageElement)
{ {
unsigned int endOfPage = _findEndOfElementContent(startOfPageElement); unsigned int endOfPage = _findEndOfElementContent(startOfPageElement);
_page->forgetAboutChildren(startOfPageElement, endOfPage); _page->forgetAboutChildren(startOfPageElement, endOfPage);
_page->eraseContent(startOfPageElement, endOfPage - startOfPageElement); _page->eraseContent(startOfPageElement, endOfPage - startOfPageElement);
unsigned int endOfObjectDescription = _pageContent.rfind(">>"); unsigned int endOfObjectDescription = _pageContent.rfind(">>");
const char * length = "/Filter /FlateDecode\n/Length "; const char * length = "/Filter /FlateDecode\n/Length ";
unsigned int sizeOfLength = strlen(length); unsigned int sizeOfLength = strlen(length);
_page->insertToContent(endOfObjectDescription, length, sizeOfLength); _page->insertToContent(endOfObjectDescription, length, sizeOfLength);
_page->insertToContent(endOfObjectDescription + sizeOfLength, Utils::uIntToStr(_concatenatedStream.size()).c_str()); _page->insertToContent(endOfObjectDescription + sizeOfLength, Utils::uIntToStr(_concatenatedStream.size()).c_str());
_page->appendContent("\nstream\n"); _page->appendContent("\nstream\n");
_page->appendContent(_concatenatedStream); _page->appendContent(_concatenatedStream);
_page->appendContent("endstream\n"); _page->appendContent("endstream\n");
_page->forgetStreamInFile(); _page->forgetStreamInFile();
} }
//get content of stream //get content of stream
// object - object with stream // object - object with stream
//leftBound - left bound of object's content //leftBound - left bound of object's content
//rightBound - right bound of object's content //rightBound - right bound of object's content
string ContentHandler::_retrieveStreamContent(merge_lib::Object * object, unsigned int leftBound, unsigned int rightBound) string ContentHandler::_retrieveStreamContent(merge_lib::Object * object, unsigned int leftBound, unsigned int rightBound)
{ {
return (object->hasStream()) ? return (object->hasStream()) ?
_getStreamFromContent(object) : _getStreamFromContent(object) :
_getStreamFromReferencies(object, leftBound, rightBound); _getStreamFromReferencies(object, leftBound, rightBound);
} }
//get stream from Array elements //get stream from Array elements
string ContentHandler::_getStreamFromReferencies(merge_lib::Object * objectWithArray, unsigned int leftBound, unsigned int rightBound) string ContentHandler::_getStreamFromReferencies(merge_lib::Object * objectWithArray, unsigned int leftBound, unsigned int rightBound)
{ {
std::string result; std::string result;
std::vector<Object *> referencies = objectWithArray->getSortedByPositionChildren(leftBound, rightBound); std::vector<Object *> referencies = objectWithArray->getSortedByPositionChildren(leftBound, rightBound);
for(size_t i = 0; i < referencies.size(); ++i) for(size_t i = 0; i < referencies.size(); ++i)
{ {
result.append(_retrieveStreamContent(referencies[i], 0, referencies[i]->getObjectContent().size())); result.append(_retrieveStreamContent(referencies[i], 0, referencies[i]->getObjectContent().size()));
} }
objectWithArray->forgetAboutChildren(leftBound,rightBound); objectWithArray->forgetAboutChildren(leftBound,rightBound);
return result; return result;
} }
//get stream from Object //get stream from Object
string ContentHandler::_getStreamFromContent(merge_lib::Object * objectWithStream) string ContentHandler::_getStreamFromContent(merge_lib::Object * objectWithStream)
{ {
Filter filter(objectWithStream); Filter filter(objectWithStream);
string decodedStream; string decodedStream;
filter.getDecodedStream(decodedStream); filter.getDecodedStream(decodedStream);
return decodedStream; return decodedStream;
} }
#if !defined ContentHandler_h #if !defined ContentHandler_h
#define ContentHandler_h #define ContentHandler_h
#include "PageElementHandler.h" #include "PageElementHandler.h"
namespace merge_lib namespace merge_lib
{ {
//this class is needed to process "Content" field of Page object //this class is needed to process "Content" field of Page object
class ContentHandler: public PageElementHandler class ContentHandler: public PageElementHandler
{ {
public: public:
ContentHandler(Object * page, const std::string & handlerName): ContentHandler(Object * page, const std::string & handlerName):
PageElementHandler(page) PageElementHandler(page)
{ {
_setHandlerName(handlerName); _setHandlerName(handlerName);
} }
virtual ~ContentHandler(){}; virtual ~ContentHandler(){};
private: private:
//methods //methods
//concatedate stream of all objects which contains Content of Page //concatedate stream of all objects which contains Content of Page
void _processObjectContent(unsigned int startOfPageElement); void _processObjectContent(unsigned int startOfPageElement);
//write concatenated stream to Page object //write concatenated stream to Page object
void _changeObjectContent(unsigned int startOfPageElement); void _changeObjectContent(unsigned int startOfPageElement);
//get content of stream //get content of stream
// object - object with stream // object - object with stream
//leftBound - left bound of object's content //leftBound - left bound of object's content
//rightBound - right bound of object's content //rightBound - right bound of object's content
std::string _retrieveStreamContent(Object * object, unsigned int leftBound, unsigned int rightBound); std::string _retrieveStreamContent(Object * object, unsigned int leftBound, unsigned int rightBound);
//get stream from Arrey elemetns //get stream from Arrey elemetns
std::string _getStreamFromReferencies(Object * objectWithArray, unsigned int leftBound, unsigned int rightBound); std::string _getStreamFromReferencies(Object * objectWithArray, unsigned int leftBound, unsigned int rightBound);
//get stream from Object //get stream from Object
std::string _getStreamFromContent(Object * objectWithStream); std::string _getStreamFromContent(Object * objectWithStream);
//memebers //memebers
std::string _concatenatedStream; std::string _concatenatedStream;
}; };
} }
#endif #endif
#ifndef CROPBOX_ELEMENT_HANDLER_H #ifndef CROPBOX_ELEMENT_HANDLER_H
#define CROPBOX_ELEMENT_HANDLER_H #define CROPBOX_ELEMENT_HANDLER_H
#include "AbstractBoxElementHandler.h" #include "AbstractBoxElementHandler.h"
#include "Rectangle.h" #include "Rectangle.h"
namespace merge_lib namespace merge_lib
{ {
//class for processing CropBox field of Page object //class for processing CropBox field of Page object
class CropBoxElementHandler: public AbstractBoxElementHandler class CropBoxElementHandler: public AbstractBoxElementHandler
{ {
public: public:
CropBoxElementHandler(Object * page): AbstractBoxElementHandler(page) CropBoxElementHandler(Object * page): AbstractBoxElementHandler(page)
{ {
_setHandlerName("/CropBox"); _setHandlerName("/CropBox");
} }
virtual ~CropBoxElementHandler() virtual ~CropBoxElementHandler()
{ {
} }
private: private:
//replace CropBox with BBox //replace CropBox with BBox
virtual void _changeObjectContent(unsigned int startOfPageElement) virtual void _changeObjectContent(unsigned int startOfPageElement)
{ {
Rectangle mediaBox("/CropBox", _page->getObjectContent()); Rectangle mediaBox("/CropBox", _page->getObjectContent());
double shiftX = Utils::doubleEquals(mediaBox.x1,0)?0:-mediaBox.x1; double shiftX = Utils::doubleEquals(mediaBox.x1,0)?0:-mediaBox.x1;
double shiftY = Utils::doubleEquals(mediaBox.y1,0)?0:-mediaBox.y1; double shiftY = Utils::doubleEquals(mediaBox.y1,0)?0:-mediaBox.y1;
mediaBox.setNewRectangleName("/BBox"); mediaBox.setNewRectangleName("/BBox");
unsigned int endOfElement = _findEndOfElementContent(startOfPageElement); unsigned int endOfElement = _findEndOfElementContent(startOfPageElement);
_page->forgetAboutChildren(startOfPageElement,endOfElement); _page->forgetAboutChildren(startOfPageElement,endOfElement);
_page->eraseContent(startOfPageElement,endOfElement-startOfPageElement); _page->eraseContent(startOfPageElement,endOfElement-startOfPageElement);
std::string newContent; std::string newContent;
mediaBox.appendRectangleToString(newContent," "); mediaBox.appendRectangleToString(newContent," ");
_page->insertToContent(startOfPageElement, newContent); _page->insertToContent(startOfPageElement, newContent);
std::stringstream matrix; std::stringstream matrix;
matrix<<"/Matrix [ 1 0 0 1 "<<shiftX<<" "<< shiftY<<" ]\n"; matrix<<"/Matrix [ 1 0 0 1 "<<shiftX<<" "<< shiftY<<" ]\n";
_page->insertToContent(startOfPageElement, matrix.str()); _page->insertToContent(startOfPageElement, matrix.str());
} }
void _pageElementNotFound() void _pageElementNotFound()
{ {
_retrieveBoxFromParent(); _retrieveBoxFromParent();
} }
}; };
} }
#endif // CROPBOX_ELEMENT_HANDLER_H #endif // CROPBOX_ELEMENT_HANDLER_H
#ifndef DCTDecode_H #ifndef DCTDecode_H
#define DCTDecode_H #define DCTDecode_H
#include <string> #include <string>
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class DCTDecode : public Decoder class DCTDecode : public Decoder
{ {
public: public:
DCTDecode(){}; DCTDecode(){};
virtual ~DCTDecode(){}; virtual ~DCTDecode(){};
bool encode(std::string & decoded) {return true;}; bool encode(std::string & decoded) {return true;};
bool decode(std::string & encoded) {return true;}; bool decode(std::string & encoded) {return true;};
void initialize(Object * objectWithStram){}; void initialize(Object * objectWithStram){};
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED
#ifndef DECODER_H #ifndef DECODER_H
#define DECODER_H #define DECODER_H
#include <string> #include <string>
#include "Object.h" #include "Object.h"
namespace merge_lib namespace merge_lib
{ {
// base class 4 all decoders // base class 4 all decoders
class Decoder class Decoder
{ {
public: public:
Decoder(){}; Decoder(){};
virtual ~Decoder(){}; virtual ~Decoder(){};
virtual bool encode(std::string &decoded) = 0; virtual bool encode(std::string &decoded) = 0;
virtual bool decode(std::string &encoded) = 0; virtual bool decode(std::string &encoded) = 0;
//read fields of objectWithStream and initialize internal parameters //read fields of objectWithStream and initialize internal parameters
//of decoder //of decoder
virtual void initialize(Object * objectWithStram) = 0; virtual void initialize(Object * objectWithStram) = 0;
}; };
} }
#endif // DECODER_H #endif // DECODER_H
#ifndef EXCEPTION_H #ifndef EXCEPTION_H
#define EXCEPTION_H #define EXCEPTION_H
#include <exception> #include <exception>
#include <string> #include <string>
#include <sstream> #include <sstream>
#include <iostream> #include <iostream>
namespace merge_lib namespace merge_lib
{ {
class Exception : public std::exception class Exception : public std::exception
{ {
public: public:
Exception() {} Exception() {}
Exception(const char * message) : _message(message) {} Exception(const char * message) : _message(message) {}
Exception(std::string & message) : _message(message) {} Exception(std::string & message) : _message(message) {}
Exception(std::stringstream & message) : _message(message.str()) {} Exception(std::stringstream & message) : _message(message.str()) {}
Exception(const std::string & message) : _message(message) {} Exception(const std::string & message) : _message(message) {}
virtual ~Exception() throw () {} virtual ~Exception() throw () {}
virtual const char * what() const throw() { return _message.c_str(); } virtual const char * what() const throw() { return _message.c_str(); }
void show() const {} void show() const {}
protected: protected:
std::string _message; std::string _message;
}; };
} }
#endif // EXCEPTION_HH #endif // EXCEPTION_HH
#if !defined FileIsAbsentException_h #if !defined FileIsAbsentException_h
#define FileIsAbsentException_h #define FileIsAbsentException_h
#include <string> #include <string>
class FileIsAbsentException class FileIsAbsentException
{ {
public: public:
FileIsAbsentException(const char * fileName): FileIsAbsentException(const char * fileName):
_fileName(fileName){}; _fileName(fileName){};
const char * getFileName() const char * getFileName()
{ {
return _fileName.c_str(); return _fileName.c_str();
} }
private: private:
std::string _fileName; std::string _fileName;
}; };
#endif #endif
#ifndef FILTER_PREDICTOR_H #ifndef FILTER_PREDICTOR_H
#define FILTER_PREDICTOR_H #define FILTER_PREDICTOR_H
#include <string> #include <string>
#include "Decoder.h" #include "Decoder.h"
namespace merge_lib namespace merge_lib
{ {
// this method performs filter prediction processing. // this method performs filter prediction processing.
class FilterPredictor:public Decoder class FilterPredictor:public Decoder
{ {
public: public:
FilterPredictor(); FilterPredictor();
virtual ~FilterPredictor(); virtual ~FilterPredictor();
bool encode(std::string & decoded){return false;} bool encode(std::string & decoded){return false;}
bool decode(std::string & encoded); bool decode(std::string & encoded);
void initialize(Object * objectWithStream); void initialize(Object * objectWithStream);
static const std::string PREDICTOR_TOKEN; static const std::string PREDICTOR_TOKEN;
static const std::string DECODE_PARAM_TOKEN; static const std::string DECODE_PARAM_TOKEN;
int getEarlyChange() const { return _earlyChange;} int getEarlyChange() const { return _earlyChange;}
private: private:
bool decodeRow(const char *input, std::string &out,const std::string &prev,int curPrediction); bool decodeRow(const char *input, std::string &out,const std::string &prev,int curPrediction);
void obtainDecodeParams(Object*objectWithStream,std::string &dictStr); void obtainDecodeParams(Object*objectWithStream,std::string &dictStr);
std::string getDictionaryContentStr(std::string & in, size_t &pos ); std::string getDictionaryContentStr(std::string & in, size_t &pos );
int _predictor; int _predictor;
int _colors; int _colors;
int _bits; int _bits;
int _columns; int _columns;
int _earlyChange; int _earlyChange;
int _rowLen; int _rowLen;
int _bytesPerPixel; int _bytesPerPixel;
}; };
} }
#endif #endif
#include <iostream> #include <iostream>
#include "FlateDecode.h" #include "FlateDecode.h"
#include "zlib.h" #include "zlib.h"
#include "Utils.h" #include "Utils.h"
#include <string.h> #include <string.h>
using namespace merge_lib; using namespace merge_lib;
#define ZLIB_MEM_DELTA 65535 #define ZLIB_MEM_DELTA 65535
#define ZLIB_CHECK_ERR(err,msg) \ #define ZLIB_CHECK_ERR(err,msg) \
if( err != Z_OK) {\ if( err != Z_OK) {\
std::cout<<msg<<" ZLIB error:"<<err<<std::endl; \ std::cout<<msg<<" ZLIB error:"<<err<<std::endl; \
}\ }\
FlateDecode::FlateDecode():_predict(NULL) FlateDecode::FlateDecode():_predict(NULL)
{ {
} }
FlateDecode::~FlateDecode() FlateDecode::~FlateDecode()
{ {
if( _predict ) if( _predict )
{ {
delete _predict; delete _predict;
} }
} }
void FlateDecode::initialize(Object * objectWithStream) void FlateDecode::initialize(Object * objectWithStream)
{ {
if( objectWithStream ) if( objectWithStream )
{ {
std::string head; std::string head;
objectWithStream->getHeader(head); objectWithStream->getHeader(head);
if( head.find(FilterPredictor::DECODE_PARAM_TOKEN) != std::string::npos ) if( head.find(FilterPredictor::DECODE_PARAM_TOKEN) != std::string::npos )
{ {
_predict = new FilterPredictor(); _predict = new FilterPredictor();
_predict->initialize(objectWithStream); _predict->initialize(objectWithStream);
} }
} }
} }
/** @brief encode /** @brief encode
* *
* @todo: * @todo:
document this function document this function
*/ */
bool FlateDecode::encode(std::string &decoded) bool FlateDecode::encode(std::string &decoded)
{ {
z_stream stream; z_stream stream;
stream.zalloc = (alloc_func)0; stream.zalloc = (alloc_func)0;
stream.zfree = (free_func)0; stream.zfree = (free_func)0;
stream.opaque = (voidpf)0; stream.opaque = (voidpf)0;
size_t out_len = 0; size_t out_len = 0;
unsigned char *out_p = NULL; unsigned char *out_p = NULL;
stream.next_out = out_p; stream.next_out = out_p;
stream.avail_out = (uInt)out_len; stream.avail_out = (uInt)out_len;
stream.next_in = (unsigned char*)decoded.c_str(); stream.next_in = (unsigned char*)decoded.c_str();
stream.avail_in = (uInt)decoded.size(); stream.avail_in = (uInt)decoded.size();
int err = deflateInit(&stream, Z_DEFAULT_COMPRESSION); int err = deflateInit(&stream, Z_DEFAULT_COMPRESSION);
ZLIB_CHECK_ERR(err, "deflateInit"); ZLIB_CHECK_ERR(err, "deflateInit");
if ( err != Z_OK ) if ( err != Z_OK )
{ {
return false; return false;
} }
bool toContinue = false; bool toContinue = false;
int flush = Z_NO_FLUSH; int flush = Z_NO_FLUSH;
do do
{ {
toContinue = false; toContinue = false;
flush = (stream.avail_in == 0)?Z_FINISH:Z_NO_FLUSH; flush = (stream.avail_in == 0)?Z_FINISH:Z_NO_FLUSH;
if ( !stream.avail_out ) if ( !stream.avail_out )
{ {
// increase the space // increase the space
out_p = (unsigned char*)realloc(out_p,out_len + ZLIB_MEM_DELTA); out_p = (unsigned char*)realloc(out_p,out_len + ZLIB_MEM_DELTA);
// init new memory // init new memory
unsigned char *new_out_start = out_p + out_len; unsigned char *new_out_start = out_p + out_len;
memset(new_out_start,0,ZLIB_MEM_DELTA); memset(new_out_start,0,ZLIB_MEM_DELTA);
// Point next_out to the next unused byte // Point next_out to the next unused byte
stream.next_out = new_out_start; stream.next_out = new_out_start;
// Update the size of the buffer // Update the size of the buffer
stream.avail_out = (uInt)ZLIB_MEM_DELTA; stream.avail_out = (uInt)ZLIB_MEM_DELTA;
out_len += ZLIB_MEM_DELTA; out_len += ZLIB_MEM_DELTA;
} }
err = deflate(&stream,flush); err = deflate(&stream,flush);
if ( err == Z_OK && stream.avail_out == 0 ) if ( err == Z_OK && stream.avail_out == 0 )
{ {
toContinue = true; toContinue = true;
} }
} }
while ( toContinue || flush == Z_NO_FLUSH ); while ( toContinue || flush == Z_NO_FLUSH );
err = deflateEnd(&stream); err = deflateEnd(&stream);
ZLIB_CHECK_ERR(err, "deflateEnd"); ZLIB_CHECK_ERR(err, "deflateEnd");
if( err != Z_OK ) if( err != Z_OK )
{ {
free(out_p); free(out_p);
return false; return false;
} }
decoded = std::string((char*)out_p,stream.total_out); decoded = std::string((char*)out_p,stream.total_out);
free(out_p); free(out_p);
return true; return true;
} }
/** @brief decode /** @brief decode
* *
* @todo: document this function * @todo: document this function
*/ */
bool FlateDecode::decode(std::string & encoded) bool FlateDecode::decode(std::string & encoded)
{ {
z_stream stream; z_stream stream;
//some initialization of ZLIB stuff //some initialization of ZLIB stuff
stream.zalloc = (alloc_func)0; stream.zalloc = (alloc_func)0;
stream.zfree = (free_func)0; stream.zfree = (free_func)0;
stream.opaque = (voidpf)0; stream.opaque = (voidpf)0;
//trace_hex((char*)encoded.c_str(),encoded.size()); //trace_hex((char*)encoded.c_str(),encoded.size());
stream.next_in = (unsigned char*)encoded.c_str(); stream.next_in = (unsigned char*)encoded.c_str();
stream.avail_in = (uInt)encoded.size(); stream.avail_in = (uInt)encoded.size();
int err = inflateInit(&stream); int err = inflateInit(&stream);
ZLIB_CHECK_ERR(err,"InflateInit"); ZLIB_CHECK_ERR(err,"InflateInit");
if ( err != Z_OK ) if ( err != Z_OK )
{ {
return false; return false;
} }
unsigned char *out_p = NULL; unsigned char *out_p = NULL;
int out_len = 0; int out_len = 0;
stream.next_out = out_p; stream.next_out = out_p;
stream.avail_out = out_len; stream.avail_out = out_len;
for (;;) for (;;)
{ {
if ( !stream.avail_out) if ( !stream.avail_out)
{ {
// there is no more space for deallocation - increase the space // there is no more space for deallocation - increase the space
out_p = (unsigned char*)realloc(out_p,out_len + ZLIB_MEM_DELTA); out_p = (unsigned char*)realloc(out_p,out_len + ZLIB_MEM_DELTA);
// init new memory // init new memory
unsigned char *new_out_start = out_p + out_len; unsigned char *new_out_start = out_p + out_len;
memset(new_out_start,0,ZLIB_MEM_DELTA); memset(new_out_start,0,ZLIB_MEM_DELTA);
// Point next_out to the next unused byte // Point next_out to the next unused byte
stream.next_out = new_out_start; stream.next_out = new_out_start;
// Update the size of the uncompressed buffer // Update the size of the uncompressed buffer
stream.avail_out = (uInt)ZLIB_MEM_DELTA; stream.avail_out = (uInt)ZLIB_MEM_DELTA;
out_len += ZLIB_MEM_DELTA; out_len += ZLIB_MEM_DELTA;
} }
err = inflate(&stream,Z_NO_FLUSH); err = inflate(&stream,Z_NO_FLUSH);
if ( err == Z_STREAM_END) if ( err == Z_STREAM_END)
{ {
break; break;
} }
ZLIB_CHECK_ERR(err,"Deflate"); ZLIB_CHECK_ERR(err,"Deflate");
if ( err != Z_OK ) if ( err != Z_OK )
{ {
if( out_p ) if( out_p )
{ {
free(out_p); free(out_p);
} }
return false; return false;
} }
} }
err = inflateEnd(&stream); err = inflateEnd(&stream);
ZLIB_CHECK_ERR(err,"InflateEnd"); ZLIB_CHECK_ERR(err,"InflateEnd");
if( err != Z_OK ) if( err != Z_OK )
{ {
if( out_p ) if( out_p )
{ {
free(out_p); free(out_p);
} }
return false; return false;
} }
encoded = std::string((char*)out_p,stream.total_out); encoded = std::string((char*)out_p,stream.total_out);
free(out_p); free(out_p);
// trace_hex((char*)encoded.c_str(),encoded.size()); // trace_hex((char*)encoded.c_str(),encoded.size());
// if predictor exists for that object, then lets decode it // if predictor exists for that object, then lets decode it
if( _predict ) if( _predict )
{ {
_predict->decode(encoded); _predict->decode(encoded);
} }
return true; return true;
} }
#ifndef FLATEDECODE_H_INCLUDED #ifndef FLATEDECODE_H_INCLUDED
#define FLATEDECODE_H_INCLUDED #define FLATEDECODE_H_INCLUDED
#include "Decoder.h" #include "Decoder.h"
#include <string> #include <string>
#include "Decoder.h" #include "Decoder.h"
#include "FilterPredictor.h" #include "FilterPredictor.h"
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class FlateDecode : public Decoder class FlateDecode : public Decoder
{ {
public: public:
FlateDecode(); FlateDecode();
virtual ~FlateDecode(); virtual ~FlateDecode();
bool encode(std::string & decoded); bool encode(std::string & decoded);
bool decode(std::string & encoded); bool decode(std::string & encoded);
void initialize(Object * objectWithStream); void initialize(Object * objectWithStream);
private: private:
FilterPredictor *_predict; FilterPredictor *_predict;
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED
#ifndef JBIG2Decode_H #ifndef JBIG2Decode_H
#define JBIG2Decode_H #define JBIG2Decode_H
#include <string> #include <string>
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class JBIG2Decode : public Decoder class JBIG2Decode : public Decoder
{ {
public: public:
JBIG2Decode(){}; JBIG2Decode(){};
virtual ~JBIG2Decode(){}; virtual ~JBIG2Decode(){};
bool encode(std::string & decoded) {return true;}; bool encode(std::string & decoded) {return true;};
bool decode(std::string & encoded) {return true;}; bool decode(std::string & encoded) {return true;};
void initialize(Object * objectWithStram){}; void initialize(Object * objectWithStram){};
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED
#include <iostream> #include <iostream>
#include "LZWDecode.h" #include "LZWDecode.h"
#include "FilterPredictor.h" #include "FilterPredictor.h"
// method performs decoding // method performs decoding
using namespace merge_lib; using namespace merge_lib;
LZWDecode::LZWDecode(): LZWDecode::LZWDecode():
_predict(NULL), _predict(NULL),
_dummy(""), _dummy(""),
_encoded(_dummy), _encoded(_dummy),
_curSymbolIndex(0), _curSymbolIndex(0),
_earlyChange(1), _earlyChange(1),
_readBuf(0), _readBuf(0),
_readBits(0), _readBits(0),
_nextCode(0), _nextCode(0),
_bitsToRead(0), _bitsToRead(0),
_curSequenceLength(0), _curSequenceLength(0),
_first(true) _first(true)
{ {
clearTable(); clearTable();
} }
LZWDecode::~LZWDecode() LZWDecode::~LZWDecode()
{ {
if( _predict ) if( _predict )
{ {
delete _predict; delete _predict;
} }
} }
void LZWDecode::initialize(Object * objectWithStream) void LZWDecode::initialize(Object * objectWithStream)
{ {
if( objectWithStream ) if( objectWithStream )
{ {
std::string head; std::string head;
objectWithStream->getHeader(head); objectWithStream->getHeader(head);
if( head.find(FilterPredictor::DECODE_PARAM_TOKEN) != std::string::npos ) if( head.find(FilterPredictor::DECODE_PARAM_TOKEN) != std::string::npos )
{ {
_predict = new FilterPredictor(); _predict = new FilterPredictor();
_predict->initialize(objectWithStream); _predict->initialize(objectWithStream);
_earlyChange = _predict->getEarlyChange(); _earlyChange = _predict->getEarlyChange();
} }
_readBits = 0; _readBits = 0;
_readBuf = 0; _readBuf = 0;
clearTable(); clearTable();
} }
} }
void LZWDecode::clearTable() void LZWDecode::clearTable()
{ {
_nextCode = 258; _nextCode = 258;
_bitsToRead = 9; _bitsToRead = 9;
_curSequenceLength = 0; _curSequenceLength = 0;
_first = true; _first = true;
} }
int LZWDecode::getCode() int LZWDecode::getCode()
{ {
int c = 0; int c = 0;
int code = 0; int code = 0;
while (_readBits < _bitsToRead) while (_readBits < _bitsToRead)
{ {
if( _curSymbolIndex < _encoded.size() ) if( _curSymbolIndex < _encoded.size() )
{ {
c = _encoded[_curSymbolIndex++]; c = _encoded[_curSymbolIndex++];
} }
else else
{ {
return EOF; return EOF;
} }
_readBuf = (_readBuf << 8) | (c & 0xff); _readBuf = (_readBuf << 8) | (c & 0xff);
_readBits += 8; _readBits += 8;
} }
code = (_readBuf >> (_readBits - _bitsToRead)) & ((1 << _bitsToRead) - 1); code = (_readBuf >> (_readBits - _bitsToRead)) & ((1 << _bitsToRead) - 1);
_readBits -= _bitsToRead; _readBits -= _bitsToRead;
return code; return code;
} }
// Method performs LZW decoding // Method performs LZW decoding
bool LZWDecode::decode(std::string & encoded) bool LZWDecode::decode(std::string & encoded)
{ {
_curSymbolIndex = 0; _curSymbolIndex = 0;
_encoded = encoded; _encoded = encoded;
// LZW decoding // LZW decoding
std::string decoded; std::string decoded;
struct DecodingTable struct DecodingTable
{ {
int length; int length;
int head; int head;
unsigned tail; unsigned tail;
} decTable[4097]; } decTable[4097];
int prevCode = 0; int prevCode = 0;
int newChar = 0; int newChar = 0;
unsigned curSequence[4097]; unsigned curSequence[4097];
int nextLength = 0; int nextLength = 0;
clearTable(); clearTable();
while(1) while(1)
{ {
int code = getCode(); int code = getCode();
if( code == EOF || code == 257 ) if( code == EOF || code == 257 )
{ {
// finish // finish
break; break;
} }
if( code == 256 ) if( code == 256 )
{ {
clearTable(); clearTable();
continue; continue;
} }
if( _nextCode >= 4997 ) if( _nextCode >= 4997 )
{ {
std::cout<<"Bad LZW stream - unexpected clearTable\n"; std::cout<<"Bad LZW stream - unexpected clearTable\n";
clearTable(); clearTable();
continue; continue;
} }
nextLength = _curSequenceLength + 1; nextLength = _curSequenceLength + 1;
if( code < 256 ) if( code < 256 )
{ {
curSequence[ 0 ] = code; curSequence[ 0 ] = code;
_curSequenceLength = 1; _curSequenceLength = 1;
} }
else if( code < _nextCode ) else if( code < _nextCode )
{ {
//lets take sequence from table //lets take sequence from table
_curSequenceLength = decTable[code].length; _curSequenceLength = decTable[code].length;
int j = code; int j = code;
for( int i = _curSequenceLength - 1; i > 0; i--) for( int i = _curSequenceLength - 1; i > 0; i--)
{ {
curSequence[ i ] = decTable[j].tail; curSequence[ i ] = decTable[j].tail;
j = decTable[ j ].head; j = decTable[ j ].head;
} }
curSequence[0] = j; curSequence[0] = j;
} }
else if( code == _nextCode ) else if( code == _nextCode )
{ {
curSequence[ _curSequenceLength ] = newChar; curSequence[ _curSequenceLength ] = newChar;
++_curSequenceLength; ++_curSequenceLength;
} }
else else
{ {
std::cout<<"Bad LZW stream - unexpected code "<<code<<"\n"; std::cout<<"Bad LZW stream - unexpected code "<<code<<"\n";
break; break;
} }
newChar = curSequence[0]; newChar = curSequence[0];
if( _first ) if( _first )
{ {
_first = false; _first = false;
} }
else else
{ {
// lets build decoding table // lets build decoding table
decTable[ _nextCode ].length = nextLength; decTable[ _nextCode ].length = nextLength;
decTable[ _nextCode ].head = prevCode; decTable[ _nextCode ].head = prevCode;
decTable[ _nextCode ].tail = newChar; decTable[ _nextCode ].tail = newChar;
++ _nextCode; ++ _nextCode;
// processing of PDF LZW parameter // processing of PDF LZW parameter
if (_nextCode + _earlyChange == 512) if (_nextCode + _earlyChange == 512)
{ {
_bitsToRead = 10; _bitsToRead = 10;
} }
else if (_nextCode + _earlyChange == 1024) else if (_nextCode + _earlyChange == 1024)
{ {
_bitsToRead = 11; _bitsToRead = 11;
} }
else if (_nextCode + _earlyChange == 2048) else if (_nextCode + _earlyChange == 2048)
{ {
_bitsToRead = 12; _bitsToRead = 12;
} }
} }
prevCode = code; prevCode = code;
// put current sequence to output stream // put current sequence to output stream
for(int i = 0;i < _curSequenceLength;i++) for(int i = 0;i < _curSequenceLength;i++)
{ {
decoded += (char)curSequence[ i ]; decoded += (char)curSequence[ i ];
} }
} }
encoded = decoded; encoded = decoded;
// if predictor exists for that object, then lets decode it // if predictor exists for that object, then lets decode it
if( _predict ) if( _predict )
{ {
_predict->decode(encoded); _predict->decode(encoded);
} }
return true; return true;
} }
#ifndef LZWDecode_H #ifndef LZWDecode_H
#define LZWDecode_H #define LZWDecode_H
#include <string> #include <string>
#include "Decoder.h" #include "Decoder.h"
#include "FilterPredictor.h" #include "FilterPredictor.h"
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class LZWDecode : public Decoder class LZWDecode : public Decoder
{ {
public: public:
LZWDecode(); LZWDecode();
virtual ~LZWDecode(); virtual ~LZWDecode();
bool encode(std::string & decoded) {return true;}; bool encode(std::string & decoded) {return true;};
bool decode(std::string & encoded); bool decode(std::string & encoded);
void initialize(Object * objectWithStram); void initialize(Object * objectWithStram);
private: private:
FilterPredictor *_predict; FilterPredictor *_predict;
void clearTable(); void clearTable();
int getCode(); int getCode();
std::string &_encoded; std::string &_encoded;
std::string _dummy; std::string _dummy;
size_t _curSymbolIndex; size_t _curSymbolIndex;
int _earlyChange; // early parameter int _earlyChange; // early parameter
int _readBuf; int _readBuf;
int _readBits; int _readBits;
int _nextCode; int _nextCode;
int _bitsToRead; int _bitsToRead;
bool _first; bool _first;
int _curSequenceLength; int _curSequenceLength;
}; };
} }
#endif // LZW_DECODE_H_INCLUDED #endif // LZW_DECODE_H_INCLUDED
#if !defined MediaBoxElementHandler_h #if !defined MediaBoxElementHandler_h
#define MediaBoxElementHandler_h #define MediaBoxElementHandler_h
#include "AbstractBoxElementHandler.h" #include "AbstractBoxElementHandler.h"
#include "RemoveHimSelfHandler.h" #include "RemoveHimSelfHandler.h"
#include <memory> #include <memory>
namespace merge_lib namespace merge_lib
{ {
//class for processing MediaBox field of Page object //class for processing MediaBox field of Page object
class MediaBoxElementHandler: public AbstractBoxElementHandler class MediaBoxElementHandler: public AbstractBoxElementHandler
{ {
public: public:
MediaBoxElementHandler(Object * page): AbstractBoxElementHandler(page) MediaBoxElementHandler(Object * page): AbstractBoxElementHandler(page)
{ {
_setHandlerName("/MediaBox"); _setHandlerName("/MediaBox");
} }
virtual ~MediaBoxElementHandler() virtual ~MediaBoxElementHandler()
{ {
} }
private: private:
//replace MediaBox with BBox //replace MediaBox with BBox
virtual void _changeObjectContent(unsigned int startOfPageElement) virtual void _changeObjectContent(unsigned int startOfPageElement)
{ {
if(_wasCropBoxHandlerCalled()) if(_wasCropBoxHandlerCalled())
{ {
PageElementHandler * tempNextHandler = _nextHandler; PageElementHandler * tempNextHandler = _nextHandler;
_nextHandler = new RemoveHimselfHandler(_page, _handlerName); _nextHandler = new RemoveHimselfHandler(_page, _handlerName);
_nextHandler->addNextHandler(tempNextHandler); _nextHandler->addNextHandler(tempNextHandler);
return; return;
} }
_page->eraseContent(startOfPageElement, _handlerName.size()); _page->eraseContent(startOfPageElement, _handlerName.size());
static std::string bbox("/BBox"); static std::string bbox("/BBox");
static std::string matrix("/Matrix [ 1 0 0 1 0 0 ]\n"); static std::string matrix("/Matrix [ 1 0 0 1 0 0 ]\n");
_page->insertToContent(startOfPageElement, bbox); _page->insertToContent(startOfPageElement, bbox);
_page->insertToContent(startOfPageElement, matrix); _page->insertToContent(startOfPageElement, matrix);
} }
void _pageElementNotFound() void _pageElementNotFound()
{ {
if(_wasCropBoxHandlerCalled()) if(_wasCropBoxHandlerCalled())
return; return;
_retrieveBoxFromParent(); _retrieveBoxFromParent();
} }
bool _wasCropBoxHandlerCalled() bool _wasCropBoxHandlerCalled()
{ {
return (_page->getObjectContent().find("/BBox") != std::string::npos) ? true : false; return (_page->getObjectContent().find("/BBox") != std::string::npos) ? true : false;
} }
}; };
} }
#endif #endif
#if !defined MergePageDescription_h #if !defined MergePageDescription_h
#define MergePageDescription_h #define MergePageDescription_h
#include "Transformation.h" #include "Transformation.h"
#include <map> #include <map>
#include <string> #include <string>
namespace merge_lib namespace merge_lib
{ {
struct MergePageDescription struct MergePageDescription
{ {
//members: //members:
double outPageWidth; // output page width double outPageWidth; // output page width
double outPageHeight; // output page height double outPageHeight; // output page height
unsigned int basePageNumber; unsigned int basePageNumber;
std::string baseDocumentName; std::string baseDocumentName;
TransformationDescription basePageTransformation; TransformationDescription basePageTransformation;
unsigned int overlayPageNumber; unsigned int overlayPageNumber;
TransformationDescription overlayPageTransformation; TransformationDescription overlayPageTransformation;
bool skipOverlayPage; bool skipOverlayPage;
bool skipBasePage; bool skipBasePage;
//methods: //methods:
//constructor //constructor
MergePageDescription(double outputPageWidth, MergePageDescription(double outputPageWidth,
double outputPageHeight, double outputPageHeight,
unsigned int basePageNum, unsigned int basePageNum,
const char * baseDocName, const char * baseDocName,
const TransformationDescription & baseTrans, const TransformationDescription & baseTrans,
unsigned int overlayPageNum, unsigned int overlayPageNum,
const TransformationDescription & overlayTrans, const TransformationDescription & overlayTrans,
bool omitOverlayPage = false, bool omitOverlayPage = false,
bool omitBasePage = false bool omitBasePage = false
): ):
outPageWidth(outputPageWidth), outPageWidth(outputPageWidth),
outPageHeight(outputPageHeight), outPageHeight(outputPageHeight),
basePageNumber(basePageNum), basePageNumber(basePageNum),
baseDocumentName(baseDocName), baseDocumentName(baseDocName),
basePageTransformation(baseTrans), basePageTransformation(baseTrans),
overlayPageNumber(overlayPageNum), overlayPageNumber(overlayPageNum),
overlayPageTransformation(overlayTrans), overlayPageTransformation(overlayTrans),
skipOverlayPage(omitOverlayPage), skipOverlayPage(omitOverlayPage),
skipBasePage(omitBasePage) skipBasePage(omitBasePage)
{ {
} }
MergePageDescription(double outputPageWidth, MergePageDescription(double outputPageWidth,
double outputPageHeight, double outputPageHeight,
unsigned int basePageNum, unsigned int basePageNum,
const char * baseDocName, const char * baseDocName,
const TransformationDescription & baseTrans const TransformationDescription & baseTrans
): ):
outPageWidth(outputPageWidth), outPageWidth(outputPageWidth),
outPageHeight(outputPageHeight), outPageHeight(outputPageHeight),
basePageNumber(basePageNum), basePageNumber(basePageNum),
baseDocumentName(baseDocName), baseDocumentName(baseDocName),
basePageTransformation(baseTrans), basePageTransformation(baseTrans),
overlayPageNumber(0), overlayPageNumber(0),
overlayPageTransformation(), overlayPageTransformation(),
skipOverlayPage(true), skipOverlayPage(true),
skipBasePage(false) skipBasePage(false)
{ {
} }
MergePageDescription(const MergePageDescription & copy) MergePageDescription(const MergePageDescription & copy)
{ {
*this = copy; *this = copy;
} }
MergePageDescription& operator = (const MergePageDescription &copy) MergePageDescription& operator = (const MergePageDescription &copy)
{ {
if( this != &copy ) if( this != &copy )
{ {
baseDocumentName = copy.baseDocumentName; baseDocumentName = copy.baseDocumentName;
basePageNumber = copy.basePageNumber; basePageNumber = copy.basePageNumber;
skipBasePage = copy.skipBasePage; skipBasePage = copy.skipBasePage;
skipOverlayPage = copy.skipOverlayPage; skipOverlayPage = copy.skipOverlayPage;
outPageHeight = copy.outPageHeight; outPageHeight = copy.outPageHeight;
outPageWidth = copy.outPageWidth; outPageWidth = copy.outPageWidth;
basePageTransformation = copy.basePageTransformation; basePageTransformation = copy.basePageTransformation;
overlayPageNumber = copy.overlayPageNumber; overlayPageNumber = copy.overlayPageNumber;
overlayPageTransformation = copy.overlayPageTransformation; overlayPageTransformation = copy.overlayPageTransformation;
} }
return *this; return *this;
} }
~MergePageDescription() ~MergePageDescription()
{ {
} }
}; };
// array of merge descriptions - allows to merge selected pages // array of merge descriptions - allows to merge selected pages
typedef std::vector<MergePageDescription> MergeDescription; typedef std::vector<MergePageDescription> MergeDescription;
} }
#endif #endif
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// Merger.cpp // Merger.cpp
// Implementation of the Class Merger // Implementation of the Class Merger
// Created on: 19-???-2009 12:27:54 // Created on: 19-???-2009 12:27:54
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
#include "Merger.h" #include "Merger.h"
#include "Parser.h" #include "Parser.h"
#include "OverlayDocumentParser.h" #include "OverlayDocumentParser.h"
#include "Exception.h" #include "Exception.h"
#include <map> #include <map>
#include <iostream> #include <iostream>
using namespace merge_lib; using namespace merge_lib;
Parser Merger::_parser; Parser Merger::_parser;
Merger::Merger():_baseDocuments(),_overlayDocument(0) Merger::Merger():_baseDocuments(),_overlayDocument(0)
{ {
} }
Merger::~Merger() Merger::~Merger()
{ {
std::map<std::string, Document *>::iterator docIterator = _baseDocuments.begin(); std::map<std::string, Document *>::iterator docIterator = _baseDocuments.begin();
for(; docIterator != _baseDocuments.end(); ++docIterator) for(; docIterator != _baseDocuments.end(); ++docIterator)
{ {
delete (*docIterator).second; delete (*docIterator).second;
} }
if( _overlayDocument ) if( _overlayDocument )
{ {
delete _overlayDocument; delete _overlayDocument;
_overlayDocument = 0; _overlayDocument = 0;
} }
_baseDocuments.clear(); _baseDocuments.clear();
} }
void Merger::addBaseDocument(const char * docName) void Merger::addBaseDocument(const char * docName)
{ {
//if docName has been already opened then do nothing //if docName has been already opened then do nothing
if(_baseDocuments.count(docName)) if(_baseDocuments.count(docName))
return; return;
Document * newBaseDoc = _parser.parseDocument(docName); Document * newBaseDoc = _parser.parseDocument(docName);
_baseDocuments.insert(std::pair<std::string, Document *>(docName, newBaseDoc)); _baseDocuments.insert(std::pair<std::string, Document *>(docName, newBaseDoc));
} }
void Merger::addOverlayDocument(const char * docName) void Merger::addOverlayDocument(const char * docName)
{ {
if( _overlayDocument ) if( _overlayDocument )
{ {
delete _overlayDocument; delete _overlayDocument;
_overlayDocument = 0; _overlayDocument = 0;
} }
if( !_overlayDocument ) if( !_overlayDocument )
{ {
OverlayDocumentParser overlayDocParser; OverlayDocumentParser overlayDocParser;
_overlayDocument = overlayDocParser.parseDocument(docName); _overlayDocument = overlayDocParser.parseDocument(docName);
if( !_overlayDocument ) if( !_overlayDocument )
{ {
throw Exception("Error loading overlay document!"); throw Exception("Error loading overlay document!");
} }
} }
} }
// The main method which performs the merge // The main method which performs the merge
void Merger::merge(const char * overlayDocName, const MergeDescription & pagesToMerge) void Merger::merge(const char * overlayDocName, const MergeDescription & pagesToMerge)
{ {
if( !_overlayDocument) if( !_overlayDocument)
{ {
addOverlayDocument(overlayDocName); addOverlayDocument(overlayDocName);
if( !_overlayDocument ) if( !_overlayDocument )
{ {
throw Exception("Error loading overlay document!"); throw Exception("Error loading overlay document!");
} }
} }
MergeDescription::const_iterator pageIterator = pagesToMerge.begin(); MergeDescription::const_iterator pageIterator = pagesToMerge.begin();
for(; pageIterator != pagesToMerge.end(); ++pageIterator ) for(; pageIterator != pagesToMerge.end(); ++pageIterator )
{ {
Page * destinationPage = _overlayDocument->getPage( (*pageIterator).overlayPageNumber); Page * destinationPage = _overlayDocument->getPage( (*pageIterator).overlayPageNumber);
if( destinationPage == 0 ) if( destinationPage == 0 )
{ {
std::stringstream error; std::stringstream error;
error << "There is no page with " << (*pageIterator).overlayPageNumber << error << "There is no page with " << (*pageIterator).overlayPageNumber <<
" number in " << overlayDocName; " number in " << overlayDocName;
throw Exception(error); throw Exception(error);
} }
Document * sourceDocument = _baseDocuments[(*pageIterator).baseDocumentName]; Document * sourceDocument = _baseDocuments[(*pageIterator).baseDocumentName];
Page * sourcePage = (sourceDocument == 0)? 0 : sourceDocument->getPage((*pageIterator).basePageNumber); Page * sourcePage = (sourceDocument == 0)? 0 : sourceDocument->getPage((*pageIterator).basePageNumber);
bool isPageDuplicated = false; bool isPageDuplicated = false;
if( sourcePage ) if( sourcePage )
{ {
unsigned int howManyTimesPageFound(0); unsigned int howManyTimesPageFound(0);
for(size_t i = 0; i < pagesToMerge.size(); ++i) for(size_t i = 0; i < pagesToMerge.size(); ++i)
{ {
if(pagesToMerge[i].basePageNumber == (*pageIterator).basePageNumber) if(pagesToMerge[i].basePageNumber == (*pageIterator).basePageNumber)
++howManyTimesPageFound; ++howManyTimesPageFound;
if(howManyTimesPageFound == 2) if(howManyTimesPageFound == 2)
break; break;
} }
isPageDuplicated = (2 == howManyTimesPageFound) ? true : false; isPageDuplicated = (2 == howManyTimesPageFound) ? true : false;
} }
destinationPage->merge(sourcePage, _overlayDocument, const_cast<MergePageDescription&>((*pageIterator)), isPageDuplicated); destinationPage->merge(sourcePage, _overlayDocument, const_cast<MergePageDescription&>((*pageIterator)), isPageDuplicated);
} }
} }
// Method performs saving of merged documents into selected file // Method performs saving of merged documents into selected file
void Merger::saveMergedDocumentsAs(const char * outDocumentName) void Merger::saveMergedDocumentsAs(const char * outDocumentName)
{ {
_overlayDocument->saveAs(outDocumentName); _overlayDocument->saveAs(outDocumentName);
} }
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// Merger.h // Merger.h
// Implementation of the Class Merger // Implementation of the Class Merger
// Created on: 19--2009 12:27:54 // Created on: 19--2009 12:27:54
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
#if !defined Merger_h #if !defined Merger_h
#define Merger_h #define Merger_h
#include "Document.h" #include "Document.h"
#include "Parser.h" #include "Parser.h"
#include <map> #include <map>
// structure defines parameter of merge // structure defines parameter of merge
namespace merge_lib namespace merge_lib
{ {
class Document; class Document;
class Merger class Merger
{ {
public: public:
Merger(); Merger();
~Merger(); ~Merger();
//this method should be called every time the "Add" button is clicked //this method should be called every time the "Add" button is clicked
void addBaseDocument(const char *docName); void addBaseDocument(const char *docName);
void addOverlayDocument(const char *docName); void addOverlayDocument(const char *docName);
void saveMergedDocumentsAs(const char *outDocumentName); void saveMergedDocumentsAs(const char *outDocumentName);
void merge(const char *overlayDocName, const MergeDescription & pagesToMerge); void merge(const char *overlayDocName, const MergeDescription & pagesToMerge);
private: private:
std::map<std::string, Document * > _baseDocuments; std::map<std::string, Document * > _baseDocuments;
static Parser _parser; static Parser _parser;
Document * _overlayDocument; Document * _overlayDocument;
}; };
} }
#endif // #endif //
#if !defined OverlayDocumentParser_h #if !defined OverlayDocumentParser_h
#define OverlayDocumentParser_h #define OverlayDocumentParser_h
#include "Object.h" #include "Object.h"
#include "Document.h" #include "Document.h"
#include "Page.h" #include "Page.h"
#include "Parser.h" #include "Parser.h"
#include <map> #include <map>
#include <iostream> #include <iostream>
namespace merge_lib namespace merge_lib
{ {
class Document; class Document;
//This class parsed the pdf document and creates //This class parsed the pdf document and creates
//an Document object //an Document object
class OverlayDocumentParser: private Parser class OverlayDocumentParser: private Parser
{ {
public: public:
OverlayDocumentParser(): Parser(), _fileName() {}; OverlayDocumentParser(): Parser(), _fileName() {};
Document * parseDocument(const char * fileName); Document * parseDocument(const char * fileName);
protected: protected:
unsigned int _readTrailerAndReturnRoot(); unsigned int _readTrailerAndReturnRoot();
private: private:
//methods //methods
void _getFileContent(const char * fileName){}; void _getFileContent(const char * fileName){};
void _readXRefAndCreateObjects(); void _readXRefAndCreateObjects();
void _readXref(std::map<unsigned int, unsigned long> & objectsAndSizes); void _readXref(std::map<unsigned int, unsigned long> & objectsAndSizes);
void _getPartOfFileContent(long startOfPart, unsigned int length); void _getPartOfFileContent(long startOfPart, unsigned int length);
unsigned int _getStartOfXrefWithRoot(); unsigned int _getStartOfXrefWithRoot();
//constants //constants
static int DOC_PART_WITH_START_OF_XREF; static int DOC_PART_WITH_START_OF_XREF;
//members //members
std::string _fileName; std::string _fileName;
}; };
} }
#endif #endif
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// Page.h // Page.h
// Implementation of the Class Page // Implementation of the Class Page
// Created on: 19--2009 12:27:56 // Created on: 19--2009 12:27:56
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
#if !defined Page_h #if !defined Page_h
#define Page_h #define Page_h
#include <string> #include <string>
#include "Object.h" #include "Object.h"
#include "MergePageDescription.h" #include "MergePageDescription.h"
namespace merge_lib namespace merge_lib
{ {
class Document; class Document;
class Page class Page
{ {
friend class Parser; friend class Parser;
friend class PageElementsFactory; friend class PageElementsFactory;
public: public:
Page(unsigned int pageNumber); Page(unsigned int pageNumber);
~Page(); ~Page();
unsigned int getPageNumber() unsigned int getPageNumber()
{ {
return _pageNumber; return _pageNumber;
} }
void merge(Page * sourcePage, Document * parentDocument, MergePageDescription & description, bool isPageDuplicated); void merge(Page * sourcePage, Document * parentDocument, MergePageDescription & description, bool isPageDuplicated);
void recalculateObjectNumbers(unsigned int & newNumber); void recalculateObjectNumbers(unsigned int & newNumber);
std::string & getPageContent(); std::string & getPageContent();
const Object::Children & getPageRefs(); const Object::Children & getPageRefs();
Object * pageToXObject(std::vector<Object *> & allObjects, std::vector<Object *> & annots, bool isCloneNeeded); Object * pageToXObject(std::vector<Object *> & allObjects, std::vector<Object *> & annots, bool isCloneNeeded);
void setRotation(int rotation) void setRotation(int rotation)
{ {
_rotation = rotation; _rotation = rotation;
} }
private: private:
//methods //methods
Object * _pageToXObject(Object *& page, std::vector<Object *> & annots); Object * _pageToXObject(Object *& page, std::vector<Object *> & annots);
std::string _getMergedPageContent( unsigned int & contentPosition, std::string _getMergedPageContent( unsigned int & contentPosition,
unsigned int & parentPosition, unsigned int & parentPosition,
unsigned int & originalPage1Position, unsigned int & originalPage1Position,
unsigned int & originalPage2Position, unsigned int & originalPage2Position,
std::pair<unsigned int, unsigned int> originalPageNumbers, std::pair<unsigned int, unsigned int> originalPageNumbers,
const MergePageDescription & description, const MergePageDescription & description,
Object * basePage, Object * basePage,
const std::vector<Object *> & annots, const std::vector<Object *> & annots,
std::vector <Object::ChildAndItPositionInContent> & annotsPositions std::vector <Object::ChildAndItPositionInContent> & annotsPositions
); );
//members //members
Object * _root; Object * _root;
unsigned int _pageNumber; unsigned int _pageNumber;
int _rotation; int _rotation;
}; };
} }
#endif #endif
#include "PageElementHandler.h" #include "PageElementHandler.h"
using namespace merge_lib; using namespace merge_lib;
std::set<std::string> PageElementHandler::_allPageFields; std::set<std::string> PageElementHandler::_allPageFields;
void PageElementHandler::_createAllPageFieldsSet() void PageElementHandler::_createAllPageFieldsSet()
{ {
if(!_allPageFields.empty()) if(!_allPageFields.empty())
return; return;
_allPageFields.insert(std::string("Type")); _allPageFields.insert(std::string("Type"));
_allPageFields.insert(std::string("Parent")); _allPageFields.insert(std::string("Parent"));
_allPageFields.insert(std::string("LastModified")); _allPageFields.insert(std::string("LastModified"));
_allPageFields.insert(std::string("Resources")); _allPageFields.insert(std::string("Resources"));
_allPageFields.insert(std::string("MediaBox")); _allPageFields.insert(std::string("MediaBox"));
_allPageFields.insert(std::string("CropBox")); _allPageFields.insert(std::string("CropBox"));
_allPageFields.insert(std::string("BleedBox")); _allPageFields.insert(std::string("BleedBox"));
_allPageFields.insert(std::string("TrimBox")); _allPageFields.insert(std::string("TrimBox"));
_allPageFields.insert(std::string("ArtBox")); _allPageFields.insert(std::string("ArtBox"));
_allPageFields.insert(std::string("BoxColorInfo")); _allPageFields.insert(std::string("BoxColorInfo"));
_allPageFields.insert(std::string("Contents")); _allPageFields.insert(std::string("Contents"));
_allPageFields.insert(std::string("Rotate")); _allPageFields.insert(std::string("Rotate"));
_allPageFields.insert(std::string("Group")); _allPageFields.insert(std::string("Group"));
_allPageFields.insert(std::string("Thumb")); _allPageFields.insert(std::string("Thumb"));
_allPageFields.insert(std::string("B")); _allPageFields.insert(std::string("B"));
_allPageFields.insert(std::string("Dur")); _allPageFields.insert(std::string("Dur"));
_allPageFields.insert(std::string("Trans")); _allPageFields.insert(std::string("Trans"));
_allPageFields.insert(std::string("Annots")); _allPageFields.insert(std::string("Annots"));
_allPageFields.insert(std::string("AA")); _allPageFields.insert(std::string("AA"));
_allPageFields.insert(std::string("Metadata")); _allPageFields.insert(std::string("Metadata"));
_allPageFields.insert(std::string("PieceInfo")); _allPageFields.insert(std::string("PieceInfo"));
_allPageFields.insert(std::string("StructParents")); _allPageFields.insert(std::string("StructParents"));
_allPageFields.insert(std::string("ID")); _allPageFields.insert(std::string("ID"));
_allPageFields.insert(std::string("PZ")); _allPageFields.insert(std::string("PZ"));
_allPageFields.insert(std::string("SeparationInfo")); _allPageFields.insert(std::string("SeparationInfo"));
_allPageFields.insert(std::string("Tabs")); _allPageFields.insert(std::string("Tabs"));
_allPageFields.insert(std::string("TemplateInstantiated")); _allPageFields.insert(std::string("TemplateInstantiated"));
_allPageFields.insert(std::string("PresSteps")); _allPageFields.insert(std::string("PresSteps"));
_allPageFields.insert(std::string("UserUnit")); _allPageFields.insert(std::string("UserUnit"));
_allPageFields.insert(std::string("VP")); _allPageFields.insert(std::string("VP"));
//for correct search all fields of XObject should be present to //for correct search all fields of XObject should be present to
_allPageFields.insert(std::string("Subtype")); _allPageFields.insert(std::string("Subtype"));
_allPageFields.insert(std::string("FormType")); _allPageFields.insert(std::string("FormType"));
_allPageFields.insert(std::string("BBox")); _allPageFields.insert(std::string("BBox"));
_allPageFields.insert(std::string("Matrix")); _allPageFields.insert(std::string("Matrix"));
_allPageFields.insert(std::string("Ref")); _allPageFields.insert(std::string("Ref"));
_allPageFields.insert(std::string("StructParent")); _allPageFields.insert(std::string("StructParent"));
_allPageFields.insert(std::string("OPI")); _allPageFields.insert(std::string("OPI"));
_allPageFields.insert(std::string("OC")); _allPageFields.insert(std::string("OC"));
_allPageFields.insert(std::string("Name")); _allPageFields.insert(std::string("Name"));
} }
unsigned int PageElementHandler::_findEndOfElementContent(unsigned int startOfPageElement) unsigned int PageElementHandler::_findEndOfElementContent(unsigned int startOfPageElement)
{ {
static std::string whitespacesAndDelimeters(" \t\f\v\n\r<<[/"); static std::string whitespacesAndDelimeters(" \t\f\v\n\r<<[/");
unsigned int foundSlash = _pageContent.find("/", startOfPageElement + 1); unsigned int foundSlash = _pageContent.find("/", startOfPageElement + 1);
std::string fieldType; std::string fieldType;
while(foundSlash != std::string::npos) while(foundSlash != std::string::npos)
{ {
unsigned int foundWhitespace = _pageContent.find_first_of(whitespacesAndDelimeters, foundSlash + 1); unsigned int foundWhitespace = _pageContent.find_first_of(whitespacesAndDelimeters, foundSlash + 1);
if(foundWhitespace != std::string::npos) if(foundWhitespace != std::string::npos)
fieldType = _pageContent.substr(foundSlash + 1, foundWhitespace - foundSlash - 1); fieldType = _pageContent.substr(foundSlash + 1, foundWhitespace - foundSlash - 1);
else else
break; break;
//is this any page element between "/" and " " //is this any page element between "/" and " "
if(_allPageFields.count(fieldType)) if(_allPageFields.count(fieldType))
{ {
return foundSlash; return foundSlash;
} }
foundSlash = foundWhitespace; foundSlash = foundWhitespace;
} }
return _pageContent.rfind(">>"); return _pageContent.rfind(">>");
} }
#if !defined PageElementHandler_h #if !defined PageElementHandler_h
#define PageElementHandler_h #define PageElementHandler_h
#include <string> #include <string>
#include <set> #include <set>
#include "Object.h" #include "Object.h"
#include "Parser.h" #include "Parser.h"
namespace merge_lib namespace merge_lib
{ {
//base class for all elements handlers //base class for all elements handlers
//Handlers are needed to process Page fields during the merge //Handlers are needed to process Page fields during the merge
//this is the chain of responsibility pattern //this is the chain of responsibility pattern
class PageElementHandler class PageElementHandler
{ {
public: public:
PageElementHandler(Object * page): _page(page), _pageContent(page->_content), _nextHandler(0) PageElementHandler(Object * page): _page(page), _pageContent(page->_content), _nextHandler(0)
{ {
_createAllPageFieldsSet(); _createAllPageFieldsSet();
} }
virtual ~PageElementHandler() virtual ~PageElementHandler()
{ {
delete _nextHandler; delete _nextHandler;
} }
void addNextHandler(PageElementHandler * nextHandler) void addNextHandler(PageElementHandler * nextHandler)
{ {
_nextHandler = nextHandler; _nextHandler = nextHandler;
} }
void processObjectContent() void processObjectContent()
{ {
unsigned int startOfPageElement = _findStartOfPageElement(); unsigned int startOfPageElement = _findStartOfPageElement();
if(startOfPageElement != std::string::npos) if(startOfPageElement != std::string::npos)
_processObjectContent(startOfPageElement); _processObjectContent(startOfPageElement);
if(_nextHandler) if(_nextHandler)
_nextHandler->processObjectContent(); _nextHandler->processObjectContent();
} }
void changeObjectContent() void changeObjectContent()
{ {
unsigned int startOfPageElement = _findStartOfPageElement(); unsigned int startOfPageElement = _findStartOfPageElement();
if(startOfPageElement != std::string::npos) if(startOfPageElement != std::string::npos)
_changeObjectContent(startOfPageElement); _changeObjectContent(startOfPageElement);
else else
_pageElementNotFound(); _pageElementNotFound();
if(_nextHandler) if(_nextHandler)
_nextHandler->changeObjectContent(); _nextHandler->changeObjectContent();
} }
protected: protected:
//methods //methods
void _setHandlerName(const std::string & handlerName) void _setHandlerName(const std::string & handlerName)
{ {
_handlerName = handlerName; _handlerName = handlerName;
} }
unsigned int _findEndOfElementContent(unsigned int startOfPageElement); unsigned int _findEndOfElementContent(unsigned int startOfPageElement);
void _createAllPageFieldsSet(); void _createAllPageFieldsSet();
//members //members
std::string & _pageContent; std::string & _pageContent;
Object * _page; Object * _page;
std::string _handlerName; std::string _handlerName;
PageElementHandler * _nextHandler; PageElementHandler * _nextHandler;
private: private:
//methods //methods
virtual void _processObjectContent(unsigned int startOfPageElement){}; virtual void _processObjectContent(unsigned int startOfPageElement){};
virtual void _changeObjectContent(unsigned int startOfPageElement) = 0; virtual void _changeObjectContent(unsigned int startOfPageElement) = 0;
virtual void _pageElementNotFound() {}; virtual void _pageElementNotFound() {};
unsigned int _findStartOfPageElement() unsigned int _findStartOfPageElement()
{ {
return Parser::findToken(_pageContent,_handlerName); return Parser::findToken(_pageContent,_handlerName);
} }
//members //members
static std::set<std::string> _allPageFields; static std::set<std::string> _allPageFields;
}; };
} }
#endif #endif
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// Page.h // Page.h
// Implementation of the Class Page // Implementation of the Class Page
// Created on: 19--2009 12:27:56 // Created on: 19--2009 12:27:56
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
#if !defined(EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_) #if !defined(EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_)
#define EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_ #define EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_
#include "Object.h" #include "Object.h"
#include <string> #include <string>
class PageParser class PageParser
{ {
friend class Parser; friend class Parser;
public: public:
PageParser(const std::string & pageContent); PageParser(const std::string & pageContent);
~PageParser(); ~PageParser();
unsigned int getPageNumber() unsigned int getPageNumber()
{ {
return _pageNumber; return _pageNumber;
} }
void merge(const Page & sourcePage); void merge(const Page & sourcePage);
void recalculateObjectNumbers(unsigned int & newNumber); void recalculateObjectNumbers(unsigned int & newNumber);
std::string & getPageContent(); std::string & getPageContent();
const std::map <unsigned int, Object *> & getPageRefs(); const std::map <unsigned int, Object *> & getPageRefs();
private: private:
//methods //methods
//members //members
Object * _root; Object * _root;
unsigned int _pageNumber; unsigned int _pageNumber;
}; };
#endif // !defined(EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_) #endif // !defined(EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_)
#if !defined Parser_h #if !defined Parser_h
#define Parser_h #define Parser_h
#include "Object.h" #include "Object.h"
#include "Document.h" #include "Document.h"
#include "Page.h" #include "Page.h"
#include <string> #include <string>
#include <vector> #include <vector>
namespace merge_lib namespace merge_lib
{ {
class Document; class Document;
//This class parsed the pdf document and creates //This class parsed the pdf document and creates
//an Document object //an Document object
class Parser class Parser
{ {
public: public:
Parser(): _root(0), _fileContent(), _objects(), _document(0) {}; Parser(): _root(0), _fileContent(), _objects(), _document(0) {};
Document * parseDocument(const char * fileName); Document * parseDocument(const char * fileName);
static const std::string WHITESPACES; static const std::string WHITESPACES;
static const std::string DELIMETERS; static const std::string DELIMETERS;
static const std::string NUMBERS; static const std::string NUMBERS;
static const std::string WHITESPACES_AND_DELIMETERS; static const std::string WHITESPACES_AND_DELIMETERS;
static bool getNextWord(std::string & out, const std::string &in, size_t &nextPosition,size_t *found = NULL); static bool getNextWord(std::string & out, const std::string &in, size_t &nextPosition,size_t *found = NULL);
static std::string getNextToken( const std::string &in, unsigned &position); static std::string getNextToken( const std::string &in, unsigned &position);
static void trim(std::string &str); static void trim(std::string &str);
static std::string findTokenStr(const std::string &content, const std::string &pattern, size_t start,size_t &foundStart, size_t &foundEnd); static std::string findTokenStr(const std::string &content, const std::string &pattern, size_t start,size_t &foundStart, size_t &foundEnd);
static size_t findToken(const std::string &content, const std::string &keyword,size_t start = 0); static size_t findToken(const std::string &content, const std::string &keyword,size_t start = 0);
static size_t findTokenName(const std::string &content, const std::string &keyword,size_t start = 0); static size_t findTokenName(const std::string &content, const std::string &keyword,size_t start = 0);
static unsigned int findEndOfElementContent(const std::string &content, unsigned int startOfPageElement); static unsigned int findEndOfElementContent(const std::string &content, unsigned int startOfPageElement);
static bool tokenIsAName(const std::string &content, size_t start ); static bool tokenIsAName(const std::string &content, size_t start );
protected: protected:
const std::string & _getObjectContent(unsigned int objectPosition, unsigned int & objectNumber, unsigned int & generationNumber, std::pair<unsigned int, unsigned int> &, bool &); const std::string & _getObjectContent(unsigned int objectPosition, unsigned int & objectNumber, unsigned int & generationNumber, std::pair<unsigned int, unsigned int> &, bool &);
virtual unsigned int _readTrailerAndReturnRoot(); virtual unsigned int _readTrailerAndReturnRoot();
private: private:
//methods //methods
virtual void _getFileContent(const char * fileName); virtual void _getFileContent(const char * fileName);
bool _getNextObject(Object * object); bool _getNextObject(Object * object);
void _callObserver(std::string objectContent); void _callObserver(std::string objectContent);
void _createObjectTree(const char * fileName); void _createObjectTree(const char * fileName);
void _retrieveAllPages(Object * objectWithKids); void _retrieveAllPages(Object * objectWithKids);
void _fillOutObjects(); void _fillOutObjects();
virtual void _readXRefAndCreateObjects(); virtual void _readXRefAndCreateObjects();
unsigned int _getEndOfLineFromContent(unsigned int fromPosition); unsigned int _getEndOfLineFromContent(unsigned int fromPosition);
const std::pair<unsigned int, unsigned int> & _getLineBounds(const std::string & str, unsigned int fromPosition); const std::pair<unsigned int, unsigned int> & _getLineBounds(const std::string & str, unsigned int fromPosition);
const std::string & _getNextToken(unsigned int & fromPosition); const std::string & _getNextToken(unsigned int & fromPosition);
unsigned int _countTokens(unsigned int leftBound, unsigned int rightBount); unsigned int _countTokens(unsigned int leftBound, unsigned int rightBount);
unsigned int _skipWhiteSpaces(const std::string & str); unsigned int _skipWhiteSpaces(const std::string & str);
unsigned int _skipWhiteSpacesFromContent(unsigned int fromPosition); unsigned int _skipWhiteSpacesFromContent(unsigned int fromPosition);
const std::map<unsigned int, Object::ReferencePositionsInContent> & _getReferences(const std::string & objectContent); const std::map<unsigned int, Object::ReferencePositionsInContent> & _getReferences(const std::string & objectContent);
unsigned int _skipNumber(const std::string & str, unsigned int currentPosition); unsigned int _skipNumber(const std::string & str, unsigned int currentPosition);
unsigned int _skipWhiteSpaces(const std::string & str, unsigned int fromPosition); unsigned int _skipWhiteSpaces(const std::string & str, unsigned int fromPosition);
void _createDocument(const char * docName); void _createDocument(const char * docName);
virtual unsigned int _getStartOfXrefWithRoot(); virtual unsigned int _getStartOfXrefWithRoot();
unsigned int _readTrailerAndRterievePrev(const unsigned int startPositionForSearch, unsigned int & previosXref); unsigned int _readTrailerAndRterievePrev(const unsigned int startPositionForSearch, unsigned int & previosXref);
void _clearParser(); void _clearParser();
protected: protected:
//members //members
Object * _root; Object * _root;
std::string _fileContent; std::string _fileContent;
std::map<unsigned int, Object *> _objects; std::map<unsigned int, Object *> _objects;
Document * _document; Document * _document;
}; };
} }
#endif #endif
#include "Rectangle.h" #include "Rectangle.h"
#include "Utils.h" #include "Utils.h"
#include "Object.h" #include "Object.h"
#include "Parser.h" #include "Parser.h"
#include <iostream> #include <iostream>
using namespace merge_lib; using namespace merge_lib;
Rectangle::Rectangle(const char * rectangleName): Rectangle::Rectangle(const char * rectangleName):
_rectangleName(rectangleName), _rectangleName(rectangleName),
x1(0), x1(0),
y1(0), y1(0),
x2(0), x2(0),
y2(0), y2(0),
_tm() _tm()
{} {}
Rectangle::Rectangle(const char * rectangleName, const std::string content): Rectangle::Rectangle(const char * rectangleName, const std::string content):
_rectangleName(rectangleName), _rectangleName(rectangleName),
x1(0), x1(0),
y1(0), y1(0),
x2(0), x2(0),
y2(0) y2(0)
{ {
unsigned int rectanglePosition = Parser::findToken(content,rectangleName); unsigned int rectanglePosition = Parser::findToken(content,rectangleName);
if( rectanglePosition == std::string::npos ) if( rectanglePosition == std::string::npos )
{ {
std::cerr<<"Unable to find rectangle name "<<rectangleName<<" in content\n"; std::cerr<<"Unable to find rectangle name "<<rectangleName<<" in content\n";
} }
size_t beg = content.find("[",rectanglePosition); size_t beg = content.find("[",rectanglePosition);
size_t end = content.find("]",rectanglePosition); size_t end = content.find("]",rectanglePosition);
if( beg != std::string::npos && end != std::string::npos ) if( beg != std::string::npos && end != std::string::npos )
{ {
std::string arr = content.substr(beg+1,end-beg-1); std::string arr = content.substr(beg+1,end-beg-1);
std::stringstream in; std::stringstream in;
in<<arr; in<<arr;
in>>x1>>y1>>x2>>y2; in>>x1>>y1>>x2>>y2;
} }
} }
void Rectangle::appendRectangleToString(std::string & content, const char * delimeter) void Rectangle::appendRectangleToString(std::string & content, const char * delimeter)
{ {
content.append(_getRectangleAsString(delimeter)); content.append(_getRectangleAsString(delimeter));
} }
const std::string Rectangle::_getRectangleAsString(const char * delimeter) const std::string Rectangle::_getRectangleAsString(const char * delimeter)
{ {
std::string result(_rectangleName); std::string result(_rectangleName);
result.append(" ["); result.append(" [");
result.append(Utils::doubleToStr(x1)); result.append(Utils::doubleToStr(x1));
result.append(delimeter); result.append(delimeter);
result.append(Utils::doubleToStr(y1)); result.append(Utils::doubleToStr(y1));
result.append(delimeter); result.append(delimeter);
result.append(Utils::doubleToStr(x2)); result.append(Utils::doubleToStr(x2));
result.append(delimeter); result.append(delimeter);
result.append(Utils::doubleToStr(y2)); result.append(Utils::doubleToStr(y2));
result.append(" ]\n"); result.append(" ]\n");
return result; return result;
} }
void Rectangle::setNewRectangleName(const char * newName) void Rectangle::setNewRectangleName(const char * newName)
{ {
_rectangleName = newName; _rectangleName = newName;
} }
void Rectangle::recalculateInternalRectangleCoordinates(const PageTransformations & transformations) void Rectangle::recalculateInternalRectangleCoordinates(const PageTransformations & transformations)
{ {
TransformationMatrix tempTm; TransformationMatrix tempTm;
for(size_t i = 0; i < transformations.size(); ++i) for(size_t i = 0; i < transformations.size(); ++i)
{ {
tempTm = transformations[i]->getMatrix(); tempTm = transformations[i]->getMatrix();
tempTm.add(_tm); tempTm.add(_tm);
_tm = tempTm; _tm = tempTm;
} }
_tm.recalculateCoordinates(x1, y1); _tm.recalculateCoordinates(x1, y1);
_tm.recalculateCoordinates(x2, y2); _tm.recalculateCoordinates(x2, y2);
} }
void Rectangle::updateRectangle(Object * objectWithRectangle, const char * delimeter) void Rectangle::updateRectangle(Object * objectWithRectangle, const char * delimeter)
{ {
Object * foundObjectWithRectangle; Object * foundObjectWithRectangle;
unsigned int fake; unsigned int fake;
objectWithRectangle->findObject(std::string(_rectangleName), foundObjectWithRectangle, fake); objectWithRectangle->findObject(std::string(_rectangleName), foundObjectWithRectangle, fake);
std::string objectContent = foundObjectWithRectangle->getObjectContent(); std::string objectContent = foundObjectWithRectangle->getObjectContent();
unsigned int rectanglePosition = objectContent.find(_rectangleName); unsigned int rectanglePosition = objectContent.find(_rectangleName);
unsigned int endOfRectangle = objectContent.find("]", rectanglePosition) + 1; unsigned int endOfRectangle = objectContent.find("]", rectanglePosition) + 1;
foundObjectWithRectangle->eraseContent(rectanglePosition, endOfRectangle - rectanglePosition); foundObjectWithRectangle->eraseContent(rectanglePosition, endOfRectangle - rectanglePosition);
foundObjectWithRectangle->insertToContent(rectanglePosition, _getRectangleAsString(delimeter)); foundObjectWithRectangle->insertToContent(rectanglePosition, _getRectangleAsString(delimeter));
// reread the objectContent, since it was changed just above; // reread the objectContent, since it was changed just above;
objectContent = foundObjectWithRectangle->getObjectContent(); objectContent = foundObjectWithRectangle->getObjectContent();
//update matrix //update matrix
unsigned int startOfAP = Parser::findToken(objectContent,"/AP"); unsigned int startOfAP = Parser::findToken(objectContent,"/AP");
unsigned int endOfAP = objectContent.find(">>", startOfAP); unsigned int endOfAP = objectContent.find(">>", startOfAP);
std::vector<Object *> aps = foundObjectWithRectangle->getChildrenByBounds(startOfAP, endOfAP); std::vector<Object *> aps = foundObjectWithRectangle->getChildrenByBounds(startOfAP, endOfAP);
for(size_t i = 0; i < aps.size(); ++i) for(size_t i = 0; i < aps.size(); ++i)
{ {
Object * objectWithMatrix = aps[i]; Object * objectWithMatrix = aps[i];
std::string objectContent = objectWithMatrix->getObjectContent(); std::string objectContent = objectWithMatrix->getObjectContent();
unsigned int matrixPosition = Parser::findToken(objectContent,"/Matrix"); unsigned int matrixPosition = Parser::findToken(objectContent,"/Matrix");
if(matrixPosition == std::string::npos) if(matrixPosition == std::string::npos)
continue; continue;
unsigned int matrixValueLeftBound = objectContent.find("[", matrixPosition); unsigned int matrixValueLeftBound = objectContent.find("[", matrixPosition);
unsigned int matrixValueRightBound = objectContent.find("]", matrixValueLeftBound) + 1; unsigned int matrixValueRightBound = objectContent.find("]", matrixValueLeftBound) + 1;
objectWithMatrix->eraseContent(matrixValueLeftBound, matrixValueRightBound - matrixValueLeftBound); objectWithMatrix->eraseContent(matrixValueLeftBound, matrixValueRightBound - matrixValueLeftBound);
objectWithMatrix->insertToContent(matrixValueLeftBound, _tm.getValue()); objectWithMatrix->insertToContent(matrixValueLeftBound, _tm.getValue());
} }
} }
double Rectangle::getWidth() double Rectangle::getWidth()
{ {
return x2 - x1; return x2 - x1;
} }
double Rectangle::getHeight() double Rectangle::getHeight()
{ {
return y2 - y1; return y2 - y1;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment