You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							355 lines
						
					
					
						
							9.1 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							355 lines
						
					
					
						
							9.1 KiB
						
					
					
				
								<?php
							 | 
						|
								/**
							 | 
						|
								 * This file is part of FPDI
							 | 
						|
								 *
							 | 
						|
								 * @package   FPDI
							 | 
						|
								 * @copyright Copyright (c) 2017 Setasign - Jan Slabon (http://www.setasign.com)
							 | 
						|
								 * @license   http://opensource.org/licenses/mit-license The MIT License
							 | 
						|
								 * @version   1.6.2
							 | 
						|
								 */
							 | 
						|
								
							 | 
						|
								if (!class_exists('pdf_parser')) {
							 | 
						|
								    require_once('pdf_parser.php');
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								/**
							 | 
						|
								 * Class fpdi_pdf_parser
							 | 
						|
								 */
							 | 
						|
								class fpdi_pdf_parser extends pdf_parser
							 | 
						|
								{
							 | 
						|
								    /**
							 | 
						|
								     * Pages
							 | 
						|
								     *
							 | 
						|
								     * Index begins at 0
							 | 
						|
								     *
							 | 
						|
								     * @var array
							 | 
						|
								     */
							 | 
						|
								    protected $_pages;
							 | 
						|
								    
							 | 
						|
								    /**
							 | 
						|
								     * Page count
							 | 
						|
								     *
							 | 
						|
								     * @var integer
							 | 
						|
								     */
							 | 
						|
								    protected $_pageCount;
							 | 
						|
								    
							 | 
						|
								    /**
							 | 
						|
								     * Current page number
							 | 
						|
								     *
							 | 
						|
								     * @var integer
							 | 
						|
								     */
							 | 
						|
								    public $pageNo;
							 | 
						|
								    
							 | 
						|
								    /**
							 | 
						|
								     * PDF version of imported document
							 | 
						|
								     *
							 | 
						|
								     * @var string
							 | 
						|
								     */
							 | 
						|
								    public $_pdfVersion;
							 | 
						|
								    
							 | 
						|
								    /**
							 | 
						|
								     * Available BoxTypes
							 | 
						|
								     *
							 | 
						|
								     * @var array
							 | 
						|
								     */
							 | 
						|
								    public $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox');
							 | 
						|
								        
							 | 
						|
								    /**
							 | 
						|
								     * The constructor.
							 | 
						|
								     *
							 | 
						|
								     * @param string $filename The source filename
							 | 
						|
								     */
							 | 
						|
								    public function __construct($filename)
							 | 
						|
								    {
							 | 
						|
								        parent::__construct($filename);
							 | 
						|
								
							 | 
						|
								        // resolve Pages-Dictonary
							 | 
						|
								        $pages = $this->resolveObject($this->_root[1][1]['/Pages']);
							 | 
						|
								
							 | 
						|
								        // Read pages
							 | 
						|
								        $this->_readPages($pages, $this->_pages);
							 | 
						|
								        
							 | 
						|
								        // count pages;
							 | 
						|
								        $this->_pageCount = count($this->_pages);
							 | 
						|
								    }
							 | 
						|
								    
							 | 
						|
								    /**
							 | 
						|
								     * Get page count from source file.
							 | 
						|
								     *
							 | 
						|
								     * @return int
							 | 
						|
								     */
							 | 
						|
								    public function getPageCount()
							 | 
						|
								    {
							 | 
						|
								        return $this->_pageCount;
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    /**
							 | 
						|
								     * Set the page number.
							 | 
						|
								     *
							 | 
						|
								     * @param int $pageNo Page number to use
							 | 
						|
								     * @throws InvalidArgumentException
							 | 
						|
								     */
							 | 
						|
								    public function setPageNo($pageNo)
							 | 
						|
								    {
							 | 
						|
								        $pageNo = ((int) $pageNo) - 1;
							 | 
						|
								
							 | 
						|
								        if ($pageNo < 0 || $pageNo >= $this->getPageCount()) {
							 | 
						|
								            throw new InvalidArgumentException('Invalid page number!');
							 | 
						|
								        }
							 | 
						|
								
							 | 
						|
								        $this->pageNo = $pageNo;
							 | 
						|
								    }
							 | 
						|
								    
							 | 
						|
								    /**
							 | 
						|
								     * Get page-resources from current page
							 | 
						|
								     *
							 | 
						|
								     * @return array|boolean
							 | 
						|
								     */
							 | 
						|
								    public function getPageResources()
							 | 
						|
								    {
							 | 
						|
								        return $this->_getPageResources($this->_pages[$this->pageNo]);
							 | 
						|
								    }
							 | 
						|
								    
							 | 
						|
								    /**
							 | 
						|
								     * Get page-resources from a /Page dictionary.
							 | 
						|
								     *
							 | 
						|
								     * @param array $obj Array of pdf-data
							 | 
						|
								     * @return array|boolean
							 | 
						|
								     */
							 | 
						|
								    protected function _getPageResources($obj)
							 | 
						|
								    {
							 | 
						|
								        $obj = $this->resolveObject($obj);
							 | 
						|
								
							 | 
						|
								        // If the current object has a resources
							 | 
						|
								        // dictionary associated with it, we use
							 | 
						|
								        // it. Otherwise, we move back to its
							 | 
						|
								        // parent object.
							 | 
						|
								        if (isset($obj[1][1]['/Resources'])) {
							 | 
						|
								            $res = $this->resolveObject($obj[1][1]['/Resources']);
							 | 
						|
								            if ($res[0] == pdf_parser::TYPE_OBJECT)
							 | 
						|
								                return $res[1];
							 | 
						|
								            return $res;
							 | 
						|
								        }
							 | 
						|
								
							 | 
						|
								        if (!isset($obj[1][1]['/Parent'])) {
							 | 
						|
								            return false;
							 | 
						|
								        }
							 | 
						|
								
							 | 
						|
								        $res = $this->_getPageResources($obj[1][1]['/Parent']);
							 | 
						|
								        if ($res[0] == pdf_parser::TYPE_OBJECT)
							 | 
						|
								            return $res[1];
							 | 
						|
								        return $res;
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    /**
							 | 
						|
								     * Get content of current page.
							 | 
						|
								     *
							 | 
						|
								     * If /Contents is an array, the streams are concatenated
							 | 
						|
								     *
							 | 
						|
								     * @return string
							 | 
						|
								     */
							 | 
						|
								    public function getContent()
							 | 
						|
								    {
							 | 
						|
								        $buffer = '';
							 | 
						|
								        
							 | 
						|
								        if (isset($this->_pages[$this->pageNo][1][1]['/Contents'])) {
							 | 
						|
								            $contents = $this->_getPageContent($this->_pages[$this->pageNo][1][1]['/Contents']);
							 | 
						|
								            foreach ($contents AS $tmpContent) {
							 | 
						|
								                if ($tmpContent[0] !== pdf_parser::TYPE_STREAM) {
							 | 
						|
								                    continue;
							 | 
						|
								                }
							 | 
						|
								
							 | 
						|
								                $buffer .= $this->_unFilterStream($tmpContent) . ' ';
							 | 
						|
								            }
							 | 
						|
								        }
							 | 
						|
								        
							 | 
						|
								        return $buffer;
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    /**
							 | 
						|
								     * Resolve all content objects.
							 | 
						|
								     *
							 | 
						|
								     * @param array $contentRef
							 | 
						|
								     * @return array
							 | 
						|
								     */
							 | 
						|
								    protected function _getPageContent($contentRef)
							 | 
						|
								    {
							 | 
						|
								        $contents = array();
							 | 
						|
								        
							 | 
						|
								        if ($contentRef[0] == pdf_parser::TYPE_OBJREF) {
							 | 
						|
								            $content = $this->resolveObject($contentRef);
							 | 
						|
								            if ($content[1][0] == pdf_parser::TYPE_ARRAY) {
							 | 
						|
								                $contents = $this->_getPageContent($content[1]);
							 | 
						|
								            } else {
							 | 
						|
								                $contents[] = $content;
							 | 
						|
								            }
							 | 
						|
								        } else if ($contentRef[0] == pdf_parser::TYPE_ARRAY) {
							 | 
						|
								            foreach ($contentRef[1] AS $tmp_content_ref) {
							 | 
						|
								                $contents = array_merge($contents, $this->_getPageContent($tmp_content_ref));
							 | 
						|
								            }
							 | 
						|
								        }
							 | 
						|
								
							 | 
						|
								        return $contents;
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    /**
							 | 
						|
								     * Get a boundary box from a page
							 | 
						|
								     *
							 | 
						|
								     * Array format is same as used by FPDF_TPL.
							 | 
						|
								     *
							 | 
						|
								     * @param array $page a /Page dictionary
							 | 
						|
								     * @param string $boxIndex Type of box {see {@link $availableBoxes})
							 | 
						|
								     * @param float Scale factor from user space units to points
							 | 
						|
								     *
							 | 
						|
								     * @return array|boolean
							 | 
						|
								     */
							 | 
						|
								    protected function _getPageBox($page, $boxIndex, $k)
							 | 
						|
								    {
							 | 
						|
								        $page = $this->resolveObject($page);
							 | 
						|
								        $box = null;
							 | 
						|
								        if (isset($page[1][1][$boxIndex])) {
							 | 
						|
								            $box = $page[1][1][$boxIndex];
							 | 
						|
								        }
							 | 
						|
								        
							 | 
						|
								        if (!is_null($box) && $box[0] == pdf_parser::TYPE_OBJREF) {
							 | 
						|
								            $tmp_box = $this->resolveObject($box);
							 | 
						|
								            $box = $tmp_box[1];
							 | 
						|
								        }
							 | 
						|
								            
							 | 
						|
								        if (!is_null($box) && $box[0] == pdf_parser::TYPE_ARRAY) {
							 | 
						|
								            $b = $box[1];
							 | 
						|
								            return array(
							 | 
						|
								                'x' => $b[0][1] / $k,
							 | 
						|
								                'y' => $b[1][1] / $k,
							 | 
						|
								                'w' => abs($b[0][1] - $b[2][1]) / $k,
							 | 
						|
								                'h' => abs($b[1][1] - $b[3][1]) / $k,
							 | 
						|
								                'llx' => min($b[0][1], $b[2][1]) / $k,
							 | 
						|
								                'lly' => min($b[1][1], $b[3][1]) / $k,
							 | 
						|
								                'urx' => max($b[0][1], $b[2][1]) / $k,
							 | 
						|
								                'ury' => max($b[1][1], $b[3][1]) / $k,
							 | 
						|
								            );
							 | 
						|
								        } else if (!isset($page[1][1]['/Parent'])) {
							 | 
						|
								            return false;
							 | 
						|
								        } else {
							 | 
						|
								            return $this->_getPageBox($this->resolveObject($page[1][1]['/Parent']), $boxIndex, $k);
							 | 
						|
								        }
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    /**
							 | 
						|
								     * Get all page boundary boxes by page number
							 | 
						|
								     * 
							 | 
						|
								     * @param int $pageNo The page number
							 | 
						|
								     * @param float $k Scale factor from user space units to points
							 | 
						|
								     * @return array
							 | 
						|
								     * @throws InvalidArgumentException
							 | 
						|
								     */
							 | 
						|
								    public function getPageBoxes($pageNo, $k)
							 | 
						|
								    {
							 | 
						|
								        if (!isset($this->_pages[$pageNo - 1])) {
							 | 
						|
								            throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.');
							 | 
						|
								        }
							 | 
						|
								
							 | 
						|
								        return $this->_getPageBoxes($this->_pages[$pageNo - 1], $k);
							 | 
						|
								    }
							 | 
						|
								    
							 | 
						|
								    /**
							 | 
						|
								     * Get all boxes from /Page dictionary
							 | 
						|
								     *
							 | 
						|
								     * @param array $page A /Page dictionary
							 | 
						|
								     * @param float $k Scale factor from user space units to points
							 | 
						|
								     * @return array
							 | 
						|
								     */
							 | 
						|
								    protected function _getPageBoxes($page, $k)
							 | 
						|
								    {
							 | 
						|
								        $boxes = array();
							 | 
						|
								
							 | 
						|
								        foreach($this->availableBoxes AS $box) {
							 | 
						|
								            if ($_box = $this->_getPageBox($page, $box, $k)) {
							 | 
						|
								                $boxes[$box] = $_box;
							 | 
						|
								            }
							 | 
						|
								        }
							 | 
						|
								
							 | 
						|
								        return $boxes;
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    /**
							 | 
						|
								     * Get the page rotation by page number
							 | 
						|
								     *
							 | 
						|
								     * @param integer $pageNo
							 | 
						|
								     * @throws InvalidArgumentException
							 | 
						|
								     * @return array
							 | 
						|
								     */
							 | 
						|
								    public function getPageRotation($pageNo)
							 | 
						|
								    {
							 | 
						|
								        if (!isset($this->_pages[$pageNo - 1])) {
							 | 
						|
								            throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.');
							 | 
						|
								        }
							 | 
						|
								
							 | 
						|
								        return $this->_getPageRotation($this->_pages[$pageNo - 1]);
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    /**
							 | 
						|
								     * Get the rotation value of a page
							 | 
						|
								     *
							 | 
						|
								     * @param array $obj A /Page dictionary
							 | 
						|
								     * @return array|bool
							 | 
						|
								     */
							 | 
						|
								    protected function _getPageRotation($obj)
							 | 
						|
								    {
							 | 
						|
								        $obj = $this->resolveObject($obj);
							 | 
						|
								        if (isset($obj[1][1]['/Rotate'])) {
							 | 
						|
								            $res = $this->resolveObject($obj[1][1]['/Rotate']);
							 | 
						|
								            if ($res[0] == pdf_parser::TYPE_OBJECT)
							 | 
						|
								                return $res[1];
							 | 
						|
								            return $res;
							 | 
						|
								        }
							 | 
						|
								
							 | 
						|
								        if (!isset($obj[1][1]['/Parent'])) {
							 | 
						|
								            return false;
							 | 
						|
								        }
							 | 
						|
								
							 | 
						|
								        $res = $this->_getPageRotation($obj[1][1]['/Parent']);
							 | 
						|
								        if ($res[0] == pdf_parser::TYPE_OBJECT)
							 | 
						|
								            return $res[1];
							 | 
						|
								
							 | 
						|
								        return $res;
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    /**
							 | 
						|
								     * Read all pages
							 | 
						|
								     *
							 | 
						|
								     * @param array $pages /Pages dictionary
							 | 
						|
								     * @param array $result The result array
							 | 
						|
								     * @throws Exception
							 | 
						|
								     */
							 | 
						|
								    protected function _readPages(&$pages, &$result)
							 | 
						|
								    {
							 | 
						|
								        // Get the kids dictionary
							 | 
						|
								        $_kids = $this->resolveObject($pages[1][1]['/Kids']);
							 | 
						|
								
							 | 
						|
								        if (!is_array($_kids)) {
							 | 
						|
								            throw new Exception('Cannot find /Kids in current /Page-Dictionary');
							 | 
						|
								        }
							 | 
						|
								
							 | 
						|
								        if ($_kids[0] === self::TYPE_OBJECT) {
							 | 
						|
								            $_kids =  $_kids[1];
							 | 
						|
								        }
							 | 
						|
								
							 | 
						|
								        $kids = $_kids[1];
							 | 
						|
								
							 | 
						|
								        foreach ($kids as $v) {
							 | 
						|
								            $pg = $this->resolveObject($v);
							 | 
						|
								            if ($pg[0] !== pdf_parser::TYPE_OBJECT) {
							 | 
						|
								                throw new Exception('Invalid data type in page tree.');
							 | 
						|
								            }
							 | 
						|
								
							 | 
						|
								            if ($pg[1][1]['/Type'][1] === '/Pages') {
							 | 
						|
								                // If one of the kids is an embedded
							 | 
						|
								                // /Pages array, resolve it as well.
							 | 
						|
								                $this->_readPages($pg, $result);
							 | 
						|
								            } else {
							 | 
						|
								                $result[] = $pg;
							 | 
						|
								            }
							 | 
						|
								        }
							 | 
						|
								    }
							 | 
						|
								}
							 |