PHP - ссылка на html-файлы неверна в pdf-to-html
Я установил Poppler Utils
для Windows в дополнение к https://github.com/mgufrone/pdf-to-html
Работает отлично и конвертирует PDF files
в HTML
, создав один HTML-файл, содержит 2 фрейма, один для pages navigation
а другой для actual text
,
Проблема в том, когда HTML
файлы генерируются, связывание для iframe src
дает ложную ссылку.
Например:
Test.html
Pages.html
Page_1.html
Все эти файлы существуют в одной папке с именем "Вывод".
Test.html
содержит 2 iframes
ссылка на Pages.html
а также Page_1.html
Вот проблема в Test.html
:
<frameset cols="100,*">
<frame name="links" src="output/Pages.html"/>
<frame name="contents" src="output/Pages_1.html"/>
</frameset>
Должно быть:
<frameset cols="100,*">
<frame name="links" src="Pages.html"/>
<frame name="contents" src="Pages_1.html"/>
</frameset>
PDF.php
<?php namespace Gufy\PdfToHtml;
class Pdf
{
protected $file, $info;
// protected $info_bin = '/usr/bin/pdfinfo';
public function __construct($file, $options=array())
{
$this->file = $file;
$class = $this;
array_walk($options, function($item, $key) use($class){
$class->$key = $item;
});
return $this;
}
public function getInfo()
{
if($this->info == null)
$this->info($this->file);
return $this->info;
}
protected function info()
{
$content = shell_exec($this->bin().' '.$this->file);
// print_r($info);
$options = explode("\n", $content);
$info = array();
foreach($options as &$item)
{
if(!empty($item))
{
list($key, $value) = explode(":", $item);
$info[str_replace(array(" "),array("_"),strtolower($key))] = trim($value);
}
}
// print_r($info);
$this->info = $info;
return $this;
// return $content;
}
public function html()
{
if($this->info == null)
$this->info($this->file);
return new Html($this->file);
}
public function getPages()
{
if($this->info == null)
$this->info($this->file);
return $this->info['pages'];
}
public function bin()
{
return Config::get('pdfinfo.bin', '/usr/bin/pdfinfo');
}
}
Base.php
<?php
namespace Gufy\PdfToHtml;
class Base
{
private $options=array(
'singlePage'=>false,
'imageJpeg'=>false,
'ignoreImages'=>false,
'zoom'=>1.5,
'noFrames'=>true,
);
public $outputDir;
private $bin="/usr/bin/pdftohtml";
private $file;
public function __construct($pdfFile='', $options=array())
{
if(empty($pdfFile))
return $this;
$pdf = $this;
if(!empty($options))
array_walk($options, function($value, $key) use($pdf){
$pdf->setOptions($key, $value);
});
return $this->open($pdfFile);
}
public function open($pdfFile)
{
$this->file = $pdfFile;
$this->setOutputDirectory(dirname($pdfFile));
return $this;
}
public function html()
{
$this->generate();
$file_output = $this->outputDir."/".preg_replace("/\.pdf$/","",basename($this->file)).".html";
$content = file_get_contents($file_output);
unlink($file_output);
return $content;
}
/**
* generating html files using pdftohtml software.
* @return $this current object
*/
public function generate(){
$output = $this->outputDir."/".preg_replace("/\.pdf$/","",basename($this->file)).".html";
$options = $this->generateOptions();
$command = $this->bin()." ".$options." ".$this->file." ".$output;
$result = exec($command);
return $this;
}
/**
* generate options based on the preserved options
* @return string options that will be passed on running the command
*/
public function generateOptions()
{
$generated = array();
array_walk($this->options, function($value, $key) use(&$generated){
$result = "";
switch($key)
{
case "singlePage":
$result = $value?"-c":"-s";
break;
case "imageJpeg":
$result = "-fmt ".($value?"jpg":"png");
break;
case "zoom":
$result = "-zoom ".$value;
break;
case "ignoreImages":
$result = $value?"-i":"";
break;
case 'noFrames':
$result = $value?'-noframes':'';
break;
}
$generated[] = $result;
});
return implode(" ", $generated);
}
/**
* change value of preserved configuration
* @param string $key key of option you want to change
* @param mixed $value value of option you want to change
* @return $this current object
*/
public function setOptions($key, $value)
{
if(isset($this->options[$key]))
$this->options[$key] = $value;
return $this;
}
/**
* open pdf file that will be converted. make sure it is exists
* @param string $pdfFile path to pdf file
* @return $this current object
*/
public function setOutputDirectory($dir)
{
$this->outputDir=$dir;
return $this;
}
/**
* clear the whole files that has been generated by pdftohtml. Make sure directory ONLY contain generated files from pdftohtml
* because it remove the whole contents under preserved output directory
* @return $this current object
*/
public function clearOutputDirectory()
{
$files = new \RecursiveIteratorIterator(new \RecursiveDirectoryIterator($this->outputDir, \FilesystemIterator::SKIP_DOTS));
foreach($files as $file)
{
$path = (string)$file;
$basename = basename($path);
if($basename != '..')
{
if(is_file($path) && file_exists($path))
unlink($path);
elseif(is_dir($path) && file_exists($path))
rmdir($path);
}
}
return $this;
}
public function bin()
{
return Config::get('pdftohtml.bin', '/usr/bin/pdftohtml');
}
}