Program finished

master
Julien Rosset 1 year ago
parent 5f635b3020
commit 3eaa74066d

@ -0,0 +1,5 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="Run" type="PhpLocalRunConfigurationType" factoryName="PHP Console" path="$PROJECT_DIR$/run.php" scriptParameters="$ProjectFileDir$/input -vvv">
<method v="2"/>
</configuration>
</component>

@ -1,7 +1,7 @@
{ {
"name": "jrosset/calibre_metadata_parser_ff", "name": "jrosset/calibre_metadata_parser_ff",
"description": "Calibre metadata parser for FanFiction files", "description": "Calibre metadata parser for FanFiction files",
"keywords": [], "keywords": [ ],
"type": "library", "type": "library",
"config": { "config": {
"sort-packages": true "sort-packages": true
@ -9,8 +9,13 @@
"minimum-stability": "stable", "minimum-stability": "stable",
"require": { "require": {
"php": "^8.3", "php": "^8.3",
"ext-dom": "*",
"ext-libxml": "*",
"ext-zip": "*",
"jrosset/lasterrorexception": "^1.1", "jrosset/lasterrorexception": "^1.1",
"symfony/console": "^7.0" "jrosset/mbstring-extended": "^1.3",
"symfony/console": "^7.0",
"twig/twig": "^3.10"
}, },
"autoload": { "autoload": {
"psr-4": { "psr-4": {

@ -5,5 +5,7 @@ require_once __DIR__ . '/vendor/autoload.php';
use jrosset\Main; use jrosset\Main;
mb_internal_encoding('UTF-8');
(new Main()) (new Main())
->run(); ->run();

@ -1,39 +1,231 @@
<?php <?php /** @noinspection HtmlUnknownTarget */
namespace jrosset; namespace jrosset;
use DOMDocument;
use DOMElement;
use DOMException;
use DOMNode;
use DOMXPath;
use jrosset\LastErrorException\LastErrorException; use jrosset\LastErrorException\LastErrorException;
use RecursiveDirectoryIterator; use RecursiveDirectoryIterator;
use RecursiveIteratorIterator; use RecursiveIteratorIterator;
use RegexIterator; use RegexIterator;
use SplFileInfo; use SplFileInfo;
use stdClass;
use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument; use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\SingleCommandApplication; use Symfony\Component\Console\SingleCommandApplication;
use Throwable; use Throwable;
use Twig\Environment;
use Twig\Loader\ArrayLoader;
use ZipArchive;
/** /**
* The "main" command * The "main" command
*/ */
class Main class Main {
{
/** /**
* Argument name for input files * Argument name for input files
*/ */
private const string ARGUMENT_FILES = 'files'; private const string ARGUMENT_FILES = 'files';
private const string DOM_NAMESPACE_ATTRIBUTE = 'http://www.w3.org/2000/xmlns/';
private const string OPF_NAMESPACE_DC = 'http://purl.org/dc/elements/1.1/';
private const string OPF_NAMESPACE_OPF = 'http://www.idpf.org/2007/opf';
private const string TWIG_TEMPLATE_METADATA_DESCRIPTION = 'metadata_description.html';
private const array TWIG_TEMPLATES = [
self::TWIG_TEMPLATE_METADATA_DESCRIPTION => <<<'TWIG'
<div>
<p>{{ description }}</p>
<p><b>Chapters:</b> {{ chapters ?? 1 }}</p>
<p><b>Word count:</b> {{ words }}</p>
<p><b>Published:</b> {{ publishDate }}</p>
<p><b>Last update:</b> {{ lastUpdateDate ?? publishDate }}</p>
<p><b>Status:</b> {{ status }}</p>
<p><b>Rated:</b> {{ rated }}</p>
<p><b>Genre:</b> {{ genre }}</p>
<p><b>Pairings:</b> {{ characters }}</p>
<p><b>Source link:</b><a href="{{ url }}"><span style="color: #6cb4ee">{{ url }}</span></a></p>
<p><b>Exported by:</b> {{ exportedBy }})</p>
</div>
TWIG
,
];
private const array CALIBRE_COLUMN_BUY = [
'table' => 'custom_column_9',
'column' => 'value',
'datatype' => 'enumeration',
'is_multiple' => null,
'kind' => 'field',
'name' => 'Acheté',
'search_terms' => [
'#buy',
],
'label' => 'buy',
'colnum' => 9,
'display' => [
'description' => '',
'enum_values' => [
'Non',
'En ligne',
'Papier',
'Libre',
],
'use_decorations' => 0,
'enum_colors' => [
],
],
'is_custom' => true,
'is_category' => true,
'link_column' => 'value',
'category_sort' => 'value',
'is_csp' => false,
'is_editable' => true,
'rec_index' => 22,
'#value#' => 'Libre',
'#extra#' => null,
'is_multiple2' => [
],
];
private const array CALIBRE_COLUMN_COLLECTION = [
'table' => 'custom_column_5',
'column' => 'value',
'datatype' => 'composite',
'is_multiple' => null,
'kind' => 'field',
'name' => 'Collection',
'search_terms' => [
'#collection',
],
'label' => 'collection',
'colnum' => 5,
'display' => [
'make_category' => true,
'composite_sort' => 'text',
'contains_html' => false,
'use_decorations' => 0,
'composite_template' => '{#collection_manual:ifempty({#groups_series}{#groups_series_index:0&gt;2s| [|]}{series:\'contains($,\'&lt;ordre&gt;\',\'\',strcat(test(field(\'#groups_series\'),\' - \', \'\'),$))\'})}',
'description' => '',
],
'is_custom' => true,
'is_category' => false,
'link_column' => 'value',
'category_sort' => 'value',
'is_csp' => false,
'is_editable' => true,
'rec_index' => 23,
'#value#' => 'Fan-Fiction - Harry Potter',
'is_multiple2' => [
],
];
private const array CALIBRE_COLUMN_COLLECTION_MANUAL = [
'table' => 'custom_column_7',
'column' => 'value',
'datatype' => 'text',
'is_multiple' => null,
'kind' => 'field',
'name' => 'Nom manuel de collection',
'search_terms' => [
'#collection_manual',
],
'label' => 'collection_manual',
'colnum' => 7,
'display' => [
'use_decorations' => 0,
'description' => '',
],
'is_custom' => true,
'is_category' => true,
'link_column' => 'value',
'category_sort' => 'value',
'is_csp' => false,
'is_editable' => true,
'rec_index' => 24,
'#value#' => 'Fan-Fiction - Harry Potter',
'#extra#' => null,
'is_multiple2' => [
],
];
private const array CALIBRE_COLUMN_GENRE = [
'table' => 'custom_column_2',
'column' => 'value',
'datatype' => 'text',
'is_multiple' => '|',
'kind' => 'field',
'name' => 'Genre',
'search_terms' => [
'#genre',
],
'label' => 'genre',
'colnum' => 2,
'display' => [
'is_names' => false,
'description' => '',
],
'is_custom' => true,
'is_category' => true,
'link_column' => 'value',
'category_sort' => 'value',
'is_csp' => false,
'is_editable' => true,
'rec_index' => 25,
'#value#' => [
'Fan-Fiction',
'Fantastique',
],
'#extra#' => null,
'is_multiple2' => [
'cache_to_list' => '|',
'ui_to_list' => ',',
'list_to_ui' => ', ',
],
];
private const array CALIBRE_COLUMN_GROUPS_SERIES = [
'table' => 'custom_column_6',
'column' => 'value',
'datatype' => 'series',
'is_multiple' => null,
'kind' => 'field',
'name' => 'Groupe de séries',
'search_terms' => [
'#groups_series',
],
'label' => 'groups_series',
'colnum' => 6,
'display' => [
'description' => '',
],
'is_custom' => true,
'is_category' => true,
'link_column' => 'value',
'category_sort' => 'value',
'is_csp' => false,
'is_editable' => true,
'rec_index' => 26,
'#value#' => null,
'#extra#' => null,
'is_multiple2' => [
],
];
/** /**
* @var SingleCommandApplication The command * @var SingleCommandApplication The command
*/ */
private readonly SingleCommandApplication $command; private readonly SingleCommandApplication $command;
/**
* @var Environment The twig environment
*/
private Environment $twig;
/** /**
* Initialization * Initialization
*/ */
public function __construct() public function __construct () {
{
$this->command = (new SingleCommandApplication()) $this->command = (new SingleCommandApplication())
->setName('calibre_metadata_parser_ff') ->setName('calibre_metadata_parser_ff')
->setDescription(<<<'EOF' ->setDescription(<<<'EOF'
@ -49,6 +241,10 @@ EOF
'The files or directories to process' 'The files or directories to process'
) )
->setCode($this->execute(...)); ->setCode($this->execute(...));
$this->twig = new Environment(
new ArrayLoader(self::TWIG_TEMPLATES),
);
} }
/** /**
@ -58,11 +254,9 @@ EOF
* *
* @throws Throwable If an error occurs * @throws Throwable If an error occurs
*/ */
public function run(): void public function run (): void {
{
$this->command->run(); $this->command->run();
} }
/** /**
* Execute the command * Execute the command
* *
@ -73,8 +267,7 @@ EOF
* *
* @throws Throwable If an error occurs * @throws Throwable If an error occurs
*/ */
private function execute(InputInterface $input, OutputInterface $output): int private function execute (InputInterface $input, OutputInterface $output): int {
{
foreach ($input->getArgument(self::ARGUMENT_FILES) as $file) { foreach ($input->getArgument(self::ARGUMENT_FILES) as $file) {
//region Check if file or directory exists //region Check if file or directory exists
$fileInfo = new SplFileInfo($file); $fileInfo = new SplFileInfo($file);
@ -84,9 +277,10 @@ EOF
//endregion //endregion
if ($fileInfo->isDir()) { if ($fileInfo->isDir()) {
$this->processDirectory($file, $output); $this->processDirectory($fileInfo, $output);
} else { }
$this->processFile($file, $output); else {
$this->processFile($fileInfo, $output);
} }
} }
return Command::SUCCESS; return Command::SUCCESS;
@ -102,8 +296,7 @@ EOF
* *
* @throws Throwable If an error occurs * @throws Throwable If an error occurs
*/ */
private function processDirectory(SplFileInfo $directory, OutputInterface $output): void private function processDirectory (SplFileInfo $directory, OutputInterface $output): void {
{
//region Check directory is readable //region Check directory is readable
$output->writeln('<info>Processing directory: ' . $directory->getPathname() . '</info>'); $output->writeln('<info>Processing directory: ' . $directory->getPathname() . '</info>');
if (!$directory->isReadable()) { if (!$directory->isReadable()) {
@ -122,7 +315,6 @@ EOF
} }
//endregion //endregion
} }
/** /**
* Process a file * Process a file
* *
@ -133,8 +325,8 @@ EOF
* *
* @throws Throwable If an error occurs * @throws Throwable If an error occurs
*/ */
private function processFile(SplFileInfo $file, OutputInterface $output): void private function processFile (SplFileInfo $file, OutputInterface $output): void {
{ //region EPUB check and preparation
//region Check file is OK for processing //region Check file is OK for processing
$output->writeln('<info>Processing file: ' . $file->getPathname() . '</info>'); $output->writeln('<info>Processing file: ' . $file->getPathname() . '</info>');
@ -158,7 +350,15 @@ EOF
} }
//endregion //endregion
//region Create subdirectory for the file (if necessary) //region Create subdirectory for the file (if necessary)
if (count(scandir($fileParentDirectory->getPathname())) > 1) { $parentDirectoryFiles = scandir($fileParentDirectory->getPathname());
$nbFiles = 0;
foreach ($parentDirectoryFiles as $parentDirectoryFile) {
if (in_array($parentDirectoryFile, ['.', '..', 'metadata.opf'])) {
continue;
}
$nbFiles++;
}
if ($nbFiles > 1) {
$fileParentDirectory = new SplFileInfo($fileParentDirectory->getPathname() . DIRECTORY_SEPARATOR . $file->getBasename('.' . $file->getExtension())); $fileParentDirectory = new SplFileInfo($fileParentDirectory->getPathname() . DIRECTORY_SEPARATOR . $file->getBasename('.' . $file->getExtension()));
if (!mkdir($fileParentDirectory->getPathname())) { if (!mkdir($fileParentDirectory->getPathname())) {
$output->writeln('<error>Failed to create subdirectory "' . $fileParentDirectory->getPathname() . '": ' . (new LastErrorException())->getMessage() . '</error>'); $output->writeln('<error>Failed to create subdirectory "' . $fileParentDirectory->getPathname() . '": ' . (new LastErrorException())->getMessage() . '</error>');
@ -167,11 +367,529 @@ EOF
$oldFile = $file; $oldFile = $file;
$file = new SplFileInfo($fileParentDirectory . DIRECTORY_SEPARATOR . $file->getFilename()); $file = new SplFileInfo($fileParentDirectory . DIRECTORY_SEPARATOR . $file->getFilename());
if (rename($oldFile->getPathname(), $file->getPathname())) { if (!rename($oldFile->getPathname(), $file->getPathname())) {
$output->writeln('<error>Failed to move file to subdirectory "' . $oldFile->getPathname() . '" => "' . $file->getPathname() . '": ' . (new LastErrorException())->getMessage() . '</error>'); $output->writeln('<error>Failed to move file to subdirectory "' . $oldFile->getPathname() . '" => "' . $file->getPathname() . '": ' . (new LastErrorException())->getMessage()
. '</error>'
);
return;
}
}
//endregion
//endregion
//region Read EPUB first page
//region Open the EPUB (ZIP archive)
$fileArchive = new ZipArchive();
if (($rep = $fileArchive->open($file->getPathname())) !== true) {
$output->writeln('<error>Failed to open file as ZIP archive (error #' . $rep . '): ' . $fileArchive->getStatusString() . '</error>');
return;
}
//endregion
//region Meta "container" file: root file path
$metaContainerPath = 'META-INF' . DIRECTORY_SEPARATOR . 'container.xml';
$output->writeln('Processing meta "container": ' . $metaContainerPath, OutputInterface::VERBOSITY_VERBOSE);
//region Read and parse
$metaContainerXPath = $this->readAndParseXmlDocument(
$fileArchive,
$metaContainerPath,
'meta "container" file',
'urn:oasis:names:tc:opendocument:xmlns:container',
$output
);
if ($metaContainerXPath === null) {
$fileArchive->close();
return;
}
//endregion
//region Extract root file path
if (($metaContainerRootFileNodeList = $metaContainerXPath->query('/r:container/r:rootfiles/r:rootfile[@media-type="application/oebps-package+xml"]')) === false) {
$output->writeln('<error>Unable to query root file path</error>');
$fileArchive->close();
return;
}
if ($metaContainerRootFileNodeList->count() === 0) {
$output->writeln('<error>Unable to find root file path</error>');
$fileArchive->close();
return; return;
} }
$rootFilePath = null;
/** @var DOMNode $metaContainerRootFileNode */
foreach ($metaContainerRootFileNodeList as $metaContainerRootFileNode) {
if ($metaContainerRootFileNode instanceof DOMElement && $metaContainerRootFileNode->hasAttribute('full-path')) {
$rootFilePath = $metaContainerRootFileNode->getAttribute('full-path');
break;
}
}
if ($rootFilePath === null) {
$output->writeln('<error>Unable to find root file path</error>');
$fileArchive->close();
return;
} }
//endregion //endregion
unset($metaContainerXPath);
//endregion
//region Root file: TOC file path (ncx)
$output->writeln('Processing root file: ' . $rootFilePath, OutputInterface::VERBOSITY_VERBOSE);
//region Read and parse
/** @noinspection HttpUrlsUsage */
$rootFileXPath = $this->readAndParseXmlDocument(
$fileArchive,
$rootFilePath,
'root file',
'http://www.idpf.org/2007/opf',
$output
);
if ($rootFileXPath === null) {
$fileArchive->close();
return;
}
//endregion
//region Extract TOC file path (ncx)
if (($tocPagesNodeList = $rootFileXPath->query('/r:package/r:manifest/r:item[@id="ncx"][@media-type="application/x-dtbncx+xml"]')) === false) {
$output->writeln('<error>Unable to query TOC file path</error>');
$fileArchive->close();
return;
}
if ($tocPagesNodeList->count() === 0) {
$output->writeln('<error>Unable to find TOC file path</error>');
$fileArchive->close();
return;
}
$tocFilePath = null;
/** @var DOMNode $rootFileRootFileNode */
foreach ($tocPagesNodeList as $rootFileRootFileNode) {
if ($rootFileRootFileNode instanceof DOMElement && $rootFileRootFileNode->hasAttribute('href')) {
$tocFilePath = $rootFileRootFileNode->getAttribute('href');
break;
}
}
if ($tocFilePath === null) {
$output->writeln('<error>Unable to find TOC file path</error>');
$fileArchive->close();
return;
}
//endregion
unset($rootFileXPath);
//endregion
//region TOC file: first page content
$tocFilePath = dirname($rootFilePath) . DIRECTORY_SEPARATOR . $tocFilePath;
$output->writeln('Processing TOC file: ' . $tocFilePath, OutputInterface::VERBOSITY_VERBOSE);
//region Read and parse
/** @noinspection HttpUrlsUsage */
$tocFileXPath = $this->readAndParseXmlDocument(
$fileArchive,
$tocFilePath,
'TOC file',
'http://www.daisy.org/z3986/2005/ncx/',
$output
);
if ($tocFileXPath === null) {
$fileArchive->close();
return;
}
//endregion
//region Extract page list with play order
if (($tocPagesNodeList = $tocFileXPath->query('/r:ncx/r:navMap/r:navPoint')) === false) {
$output->writeln('<error>Unable to query page list</error>');
$fileArchive->close();
return;
}
if ($tocPagesNodeList->count() === 0) {
$output->writeln('<error>Unable to find page list</error>');
$fileArchive->close();
return;
}
$pages = [];
/** @var DOMNode $rootFileRootFileNode */
foreach ($tocPagesNodeList as $tocPageNode) {
if (!$tocPageNode instanceof DOMElement) {
continue;
}
$tocPagePlayOrder = (int)$tocPageNode->getAttribute('playOrder');
if (($tocPageContentNodeList = $tocFileXPath->query('./r:content', $tocPageNode)) === false) {
$output->writeln('<error>Unable to query page (play order: ' . $tocPagePlayOrder . ') content</error>');
$fileArchive->close();
return;
}
if ($tocPageContentNodeList->count() === 0) {
$output->writeln('<error>Unable to find page (play order: ' . $tocPagePlayOrder . ') content</error>');
$fileArchive->close();
return;
}
$pageContentPath = null;
/** @var DOMNode $tocPageContentNode */
foreach ($tocPageContentNodeList as $tocPageContentNode) {
if ($tocPageContentNode instanceof DOMElement && $tocPageContentNode->hasAttribute('src')) {
$pageContentPath = $tocPageContentNode->getAttribute('src');
break;
}
}
if ($pageContentPath === null) {
$output->writeln('<error>Unable to find TOC file path</error>');
continue;
}
$pages[$tocPagePlayOrder] = $pageContentPath;
}
//endregion
//region Extract first page
ksort($pages, SORT_NUMERIC);
$firstPagePath = reset($pages);
//endregion
unset($tocFileXPath);
//endregion
//region Read the first page
$firstPagePath = dirname($tocFilePath) . DIRECTORY_SEPARATOR . $firstPagePath;
$output->writeln('Read first page: ' . $firstPagePath, OutputInterface::VERBOSITY_VERBOSE);
if (($firstPageStream = $fileArchive->getStream($firstPagePath)) === false) {
$output->writeln('<error>Failed to open first page: ' . $fileArchive->getStatusString() . '</error>');
$fileArchive->close();
return;
}
if (($firstPageContent = stream_get_contents($firstPageStream)) === false) {
$output->writeln('<error>Failed to read first page: ' . (new LastErrorException())->getMessage() . '</error>');
$fileArchive->close();
return;
}
fclose($firstPageStream);
unset($stream);
//endregion
//region Close the EPUB
$fileArchive->close();
//endregion
//endregion
//region Extract metadata information from first page
$output->writeln('Parsing metadata', OutputInterface::VERBOSITY_VERBOSE);
$metadata = new stdClass();
//region FicHub
if (preg_match('`<p>\s*Exported\s+with\s+the\s+assistance\s+of\s+<a\s+href="https://fichub\.net"\s*>\s*FicHub\.net\s*</a>\s*</p>`i', $firstPageContent) === 1) {
$metadata->publisher = 'FanFiction.net (FicHub.net)';
$metadata->exportedBy = 'FicHub.net';
if (preg_match('`<h1>(?<title>.+?)\s*</h1>`i', $firstPageContent, $match) === 1) {
$metadata->title = $match['title'];
}
if (preg_match('`<p>\s*(?:<b>)?\s*By\s*:\s*(?<author>.+?)\s*(?:</b>)?</p>`i', $firstPageContent, $match) === 1) {
$metadata->author = $match['author'];
}
preg_match_all('`<p>(?:(?<type>\w+):\s*)?(?<value>.+?)</p>`is', $firstPageContent, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
switch (mb_strtolower($match['type'] ?? '')) {
case 'status':
$metadata->status = $match['value'];
break;
case 'published':
$metadata->publishDate = $match['value'];
break;
case 'updated':
$metadata->lastUpdateDate = $match['value'];
break;
case 'words':
$metadata->words = $this->getMetadataNumber($match['value']);
break;
case 'chapters':
$metadata->chapters = $this->getMetadataNumber($match['value']);
break;
case 'rated':
$parts = explode('-', $match['value']);
if (preg_match('`^\s*Fiction\s+(?<rated>.+?)\s*$`i', trim(array_shift($parts)), $ratedMatch) === 1) {
$metadata->rated = $ratedMatch['rated'];
}
foreach ($parts as $part) {
if (preg_match('`^\s*(?<type>\w+)\s*:\s*(?<value>.+)\s*$`i', $part, $partMatch) !== 1) {
continue;
}
switch (mb_strtolower($partMatch['type'] ?? '')) {
case 'language':
$metadata->language = $partMatch['value'];
break;
case 'genre':
$metadata->genre = $partMatch['value'];
break;
case 'characters':
$metadata->characters = $partMatch['value'];
break;
}
}
break;
case '':
if (preg_match('`^Original\s+source\s*:\s*.+href\s*=\s*"(?<url>[^"]+)"`i', $match['value'], $matchUrl) === 1) {
$metadata->source = $matchUrl['url'];
}
elseif (preg_match('`^Exported\s+with`i', $match['value']) !== 1) {
$metadata->summary = $match['value'];
}
break;
}
}
}
//endregion
//region FF2EBOOK
elseif (
preg_match(
'`<div\s+class="footer"\s*>\s*Converted\s+using\s+<a\s+href="http://www\.ff2ebook\.com"\s*>\s*www\.FF2EBOOK\.com\s*</a>\s*<br ?/?>\s*Date:\s*(?<date>\d{4}-\d{2}-\d{2})\s*</div>`i',
$firstPageContent,
$match
) === 1
) {
$metadata->publisher = 'FanFiction.net (www.FF2EBOOK.com)';
$metadata->exportedBy = 'www.FF2EBOOK.com (' . $match['date'] . ')';
if (
preg_match(
'`<div\s+class="fic-title">\s*(?:<a\s+href\s*=\s*"(?<url>[^"]+)"\s*>)?\s*(?<title>.+?)\s*(?:</a>)?\s*</div>`i', $firstPageContent, $match
) === 1
) {
$metadata->title = $match['title'];
if (($match['url'] ?? '') !== '') {
$metadata->url = $match['url'];
}
}
if (
preg_match(
'`<div\s+class="fic-author">\s*By:\s*(?:<a\s+href\s*=\s*"[^"]+"\s*>)?\s*(?<author>.+?)\s*(?:</a>)?\s*</div>`i', $firstPageContent, $match
) === 1
) {
$metadata->author = $match['author'];
}
preg_match_all('`<span\s+class="bold">(?<type>.+?)\s*:\s*</span>\s*(?<value>.+?)\s*<br ?/?>`is', $firstPageContent, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
switch (mb_strtolower($match['type'] ?? '')) {
case 'status':
$metadata->status = match (mb_strtolower($match['value'])) {
'completed' => 'complete',
default => $match['value'],
};
break;
case 'published':
$metadata->publishDate = $match['value'];
break;
case 'last updated':
$metadata->lastUpdateDate = $match['value'];
break;
case 'words count':
$metadata->words = $match['value'];
break;
case 'chapters count':
$metadata->chapters = $match['value'];
break;
case 'pairings/main char.':
$metadata->characters = (isset($metadata->characters) ? $metadata->characters . ' ' : '') . $match['value'];
break;
case 'additional infos':
if (preg_match('`^\s*(?:(?<genre>[^>]+)\s*-\s*)?(?<pairings>[^>]+)\s*`i', $match['value'], $infoMatch) === 1) {
if (($infoMatch['genre'] ?? '') !== '') {
$metadata->genre = $infoMatch['genre'];
}
if (($infoMatch['pairings'] ?? '') !== '') {
$metadata->characters = $infoMatch['pairings'] . (isset($metadata->characters) ? ' ' . $metadata->characters : '');
}
}
break;
case 'summary':
$metadata->summary = $match['value'];
break;
}
}
}
//endregion
//region Unknown (error)
else {
$output->writeln('<error>Unrecognized format</error>');
$output->writeln($firstPageContent, OutputInterface::VERBOSITY_VERY_VERBOSE);
return;
}
//endregion
//endregion
//region Generate Calibre metadata (OPF file)
$output->writeln('Generate Calibre metadata (OPF file)', OutputInterface::VERBOSITY_VERBOSE);
$opf = new DOMDocument('1.0', 'utf-8');
$opf->formatOutput = true;
/** @noinspection HttpUrlsUsage */
$root = $opf->createElementNS('http://www.idpf.org/2007/opf', 'package');
$opf->appendChild($root);
$root->setAttribute('unique-identifier', 'uuid_id');
$root->setAttribute('version', '2.0');
$opfMetadata = $opf->createElement('metadata');
$root->appendChild($opfMetadata);
/** @noinspection HttpUrlsUsage */
$opfMetadata->setAttributeNS(self::DOM_NAMESPACE_ATTRIBUTE, 'xmlns:dc', self::OPF_NAMESPACE_DC);
/** @noinspection HttpUrlsUsage */
$opfMetadata->setAttributeNS(self::DOM_NAMESPACE_ATTRIBUTE, 'xmlns:opf', self::OPF_NAMESPACE_OPF);
$title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:publisher', $metadata->publisher);
$opfMetadata->appendChild($title);
if (isset($metadata->title)) {
$title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:title', $metadata->title);
$opfMetadata->appendChild($title);
}
if (isset($metadata->author)) {
$author = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:creator', $metadata->author);
$opfMetadata->appendChild($author);
$author->setAttribute('opf:role', 'aut');
}
if (isset($metadata->publishDate)) {
$title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:date', $metadata->publishDate . 'T00:00:00+00:00');
$opfMetadata->appendChild($title);
}
if (isset($metadata->language)) {
$title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:language', mb_substr(mb_strtolower($metadata->language), 0, 3));
$opfMetadata->appendChild($title);
}
$description = $opf->createElementNS(
self::OPF_NAMESPACE_DC, 'dc:description',
$this->twig->render(
self::TWIG_TEMPLATE_METADATA_DESCRIPTION,
(array)$metadata
)
);
$opfMetadata->appendChild($description);
$this->calibreAddMetaColumn($opfMetadata, 'buy', self::CALIBRE_COLUMN_BUY);
$this->calibreAddMetaColumn($opfMetadata, 'collection', self::CALIBRE_COLUMN_COLLECTION);
$this->calibreAddMetaColumn($opfMetadata, 'collection_manual', self::CALIBRE_COLUMN_COLLECTION_MANUAL);
$this->calibreAddMetaColumn($opfMetadata, 'genre', self::CALIBRE_COLUMN_GENRE);
$this->calibreAddMetaColumn($opfMetadata, 'groups_series', self::CALIBRE_COLUMN_GROUPS_SERIES);
if (file_put_contents($fileParentDirectory->getPathname() . DIRECTORY_SEPARATOR . 'metadata.opf', $opf->saveXML()) === false) {
$output->writeln('<error>Failed to write the Calibre metadata: ' . (new LastErrorException())->getMessage() . '</error>');
return;
}
$output->writeln('Calibre metadata generated !', OutputInterface::VERBOSITY_VERBOSE);
//endregion
}
/**
* Read and parse and XML document of a ZIP archive
*
* @param ZipArchive $fileArchive The ZIP archive
* @param string $documentPath The XML document path in ZIP archive
* @param string $label The label for the command line output
* @param string $namespace The XPath namespace to register
* @param OutputInterface $output The command line output
*
* @return DOMXPath|null The XPath or Null if failed
*/
private function readAndParseXmlDocument (ZipArchive $fileArchive, string $documentPath, string $label, string $namespace, OutputInterface $output): ?DOMXPath {
//region Read the file
if (($stream = $fileArchive->getStream($documentPath)) === false) {
$output->writeln('<error>Failed to open ' . $label . ': ' . $fileArchive->getStatusString() . '</error>');
return null;
}
if (($content = stream_get_contents($stream)) === false) {
$output->writeln('<error>Failed to read ' . $label . ': ' . (new LastErrorException())->getMessage() . '</error>');
return null;
}
fclose($stream);
unset($stream);
//endregion
//region Parse it as XML
libxml_clear_errors();
$dom = new DOMDocument();
if (!$dom->loadXML($content, LIBXML_COMPACT)) {
$errors = libxml_get_errors();
$output->writeln('<error>Failed to parse ' . $label . ': ' . count($errors) . ' errors</error>');
foreach ($errors as $error) {
$levelName = match ($error->level) {
LIBXML_ERR_WARNING => 'Warning',
LIBXML_ERR_ERROR => 'Error',
LIBXML_ERR_FATAL => 'Fatal',
LIBXML_ERR_NONE => 'None',
default => 'Unknown (' . $error->level . ')'
};
$output->writeln(
'<error>' . $levelName . ' #' . $error->code . ': ' . $error->message . ' (line: ' . $error->line . ', column: ' . $error->column . ')</error>',
OutputInterface::VERBOSITY_VERBOSE
);
}
return null;
}
//endregion
//region Register XPath namespaces
$xpath = new DOMXPath($dom);
/** @noinspection HttpUrlsUsage */
if (!$xpath->registerNamespace('r', $namespace)) {
$output->writeln('<error>Failed to register ' . $label . ' XPath namespace</error>');
$fileArchive->close();
return null;
}
//endregion
return $xpath;
}
/**
* Convert a number to the metadata form
*
* @param string $number The number to convert
*
* @return string The number in metadata form
*/
private function getMetadataNumber (string $number): string {
$numberLength = mb_strlen($number);
$number = MbstringExtended::str_pad($number, $numberLength + 3 - ($numberLength % 3), '0', STR_PAD_LEFT);
$numberParts = mb_str_split($number, 3);
$numberParts[0] = ltrim($numberParts[0], '0');
return implode(',', $numberParts);
}
/**
* Add a Calibre meta column
*
* @param DOMElement $metadata The "metadata" node
* @param string $name The column name
* @param array $content The column content and definition
*
* @return void
*
* @throws DOMException If an error occurs
*/
private function calibreAddMetaColumn (DOMElement $metadata, string $name, array $content): void {
$metaColumn = $metadata->ownerDocument->createElement('meta');
$metadata->appendChild($metaColumn);
$metaColumn->setAttribute('name', 'calibre:user_metadata:#' . $name);
$metaColumn->setAttribute('content', json_encode($content));
} }
} }
Loading…
Cancel
Save