From 3eaa74066d1aac2bf7173eca8644b09860955bd5 Mon Sep 17 00:00:00 2001 From: darkelfe14728 Date: Sun, 26 May 2024 18:33:45 +0200 Subject: [PATCH] Program finished --- .run/Run.run.xml | 5 + composer.json | 71 +++-- run.php | 2 + src/Main.php | 764 +++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 786 insertions(+), 56 deletions(-) create mode 100644 .run/Run.run.xml diff --git a/.run/Run.run.xml b/.run/Run.run.xml new file mode 100644 index 0000000..ffc2ac2 --- /dev/null +++ b/.run/Run.run.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/composer.json b/composer.json index 507bbc3..15d2a13 100644 --- a/composer.json +++ b/composer.json @@ -1,36 +1,41 @@ { - "name": "jrosset/calibre_metadata_parser_ff", - "description": "Calibre metadata parser for FanFiction files", - "keywords": [], - "type": "library", - "config": { - "sort-packages": true - }, - "minimum-stability": "stable", - "require": { - "php": "^8.3", - "jrosset/lasterrorexception": "^1.1", - "symfony/console": "^7.0" - }, - "autoload": { - "psr-4": { - "jrosset\\": "src/" + "name": "jrosset/calibre_metadata_parser_ff", + "description": "Calibre metadata parser for FanFiction files", + "keywords": [ ], + "type": "library", + "config": { + "sort-packages": true + }, + "minimum-stability": "stable", + "require": { + "php": "^8.3", + "ext-dom": "*", + "ext-libxml": "*", + "ext-zip": "*", + "jrosset/lasterrorexception": "^1.1", + "jrosset/mbstring-extended": "^1.3", + "symfony/console": "^7.0", + "twig/twig": "^3.10" + }, + "autoload": { + "psr-4": { + "jrosset\\": "src/" + } + }, + "readme": "README.md", + "homepage": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff", + "license": "CC-BY-4.0", + "authors": [ + { + "name": "Julien Rosset", + "email": "jul.rosset@gmail.com" + } + ], + "support": { + "email": "jul.rosset@gmail.com", + "issues": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/issues", + "wiki": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/wiki", + "docs": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/wiki", + "source": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff" } - }, - "readme": "README.md", - "homepage": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff", - "license": "CC-BY-4.0", - "authors": [ - { - "name": "Julien Rosset", - "email": "jul.rosset@gmail.com" - } - ], - "support": { - "email": "jul.rosset@gmail.com", - "issues": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/issues", - "wiki": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/wiki", - "docs": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/wiki", - "source": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff" - } } diff --git a/run.php b/run.php index bc81103..6e0d046 100644 --- a/run.php +++ b/run.php @@ -5,5 +5,7 @@ require_once __DIR__ . '/vendor/autoload.php'; use jrosset\Main; +mb_internal_encoding('UTF-8'); + (new Main()) ->run(); diff --git a/src/Main.php b/src/Main.php index 20f9da6..216fbb2 100644 --- a/src/Main.php +++ b/src/Main.php @@ -1,39 +1,231 @@ - <<<'TWIG' +
+

{{ description }}

+

Chapters: {{ chapters ?? 1 }}

+

Word count: {{ words }}

+

Published: {{ publishDate }}

+

Last update: {{ lastUpdateDate ?? publishDate }}

+

Status: {{ status }}

+

Rated: {{ rated }}

+

Genre: {{ genre }}

+

Pairings: {{ characters }}

+

Source link:{{ url }}

+

Exported by: {{ exportedBy }})

+
+TWIG + , + ]; + + private const array CALIBRE_COLUMN_BUY = [ + 'table' => 'custom_column_9', + 'column' => 'value', + 'datatype' => 'enumeration', + 'is_multiple' => null, + 'kind' => 'field', + 'name' => 'Acheté', + 'search_terms' => [ + '#buy', + ], + 'label' => 'buy', + 'colnum' => 9, + 'display' => [ + 'description' => '', + 'enum_values' => [ + 'Non', + 'En ligne', + 'Papier', + 'Libre', + ], + 'use_decorations' => 0, + 'enum_colors' => [ + ], + ], + 'is_custom' => true, + 'is_category' => true, + 'link_column' => 'value', + 'category_sort' => 'value', + 'is_csp' => false, + 'is_editable' => true, + 'rec_index' => 22, + '#value#' => 'Libre', + '#extra#' => null, + 'is_multiple2' => [ + ], + ]; + private const array CALIBRE_COLUMN_COLLECTION = [ + 'table' => 'custom_column_5', + 'column' => 'value', + 'datatype' => 'composite', + 'is_multiple' => null, + 'kind' => 'field', + 'name' => 'Collection', + 'search_terms' => [ + '#collection', + ], + 'label' => 'collection', + 'colnum' => 5, + 'display' => [ + 'make_category' => true, + 'composite_sort' => 'text', + 'contains_html' => false, + 'use_decorations' => 0, + 'composite_template' => '{#collection_manual:ifempty({#groups_series}{#groups_series_index:0>2s| [|]}{series:\'contains($,\'<ordre>\',\'\',strcat(test(field(\'#groups_series\'),\' - \', \'\'),$))\'})}', + 'description' => '', + ], + 'is_custom' => true, + 'is_category' => false, + 'link_column' => 'value', + 'category_sort' => 'value', + 'is_csp' => false, + 'is_editable' => true, + 'rec_index' => 23, + '#value#' => 'Fan-Fiction - Harry Potter', + 'is_multiple2' => [ + ], + ]; + private const array CALIBRE_COLUMN_COLLECTION_MANUAL = [ + 'table' => 'custom_column_7', + 'column' => 'value', + 'datatype' => 'text', + 'is_multiple' => null, + 'kind' => 'field', + 'name' => 'Nom manuel de collection', + 'search_terms' => [ + '#collection_manual', + ], + 'label' => 'collection_manual', + 'colnum' => 7, + 'display' => [ + 'use_decorations' => 0, + 'description' => '', + ], + 'is_custom' => true, + 'is_category' => true, + 'link_column' => 'value', + 'category_sort' => 'value', + 'is_csp' => false, + 'is_editable' => true, + 'rec_index' => 24, + '#value#' => 'Fan-Fiction - Harry Potter', + '#extra#' => null, + 'is_multiple2' => [ + ], + ]; + private const array CALIBRE_COLUMN_GENRE = [ + 'table' => 'custom_column_2', + 'column' => 'value', + 'datatype' => 'text', + 'is_multiple' => '|', + 'kind' => 'field', + 'name' => 'Genre', + 'search_terms' => [ + '#genre', + ], + 'label' => 'genre', + 'colnum' => 2, + 'display' => [ + 'is_names' => false, + 'description' => '', + ], + 'is_custom' => true, + 'is_category' => true, + 'link_column' => 'value', + 'category_sort' => 'value', + 'is_csp' => false, + 'is_editable' => true, + 'rec_index' => 25, + '#value#' => [ + 'Fan-Fiction', + 'Fantastique', + ], + '#extra#' => null, + 'is_multiple2' => [ + 'cache_to_list' => '|', + 'ui_to_list' => ',', + 'list_to_ui' => ', ', + ], + ]; + private const array CALIBRE_COLUMN_GROUPS_SERIES = [ + 'table' => 'custom_column_6', + 'column' => 'value', + 'datatype' => 'series', + 'is_multiple' => null, + 'kind' => 'field', + 'name' => 'Groupe de séries', + 'search_terms' => [ + '#groups_series', + ], + 'label' => 'groups_series', + 'colnum' => 6, + 'display' => [ + 'description' => '', + ], + 'is_custom' => true, + 'is_category' => true, + 'link_column' => 'value', + 'category_sort' => 'value', + 'is_csp' => false, + 'is_editable' => true, + 'rec_index' => 26, + '#value#' => null, + '#extra#' => null, + 'is_multiple2' => [ + ], + ]; + /** * @var SingleCommandApplication The command */ private readonly SingleCommandApplication $command; + /** + * @var Environment The twig environment + */ + private Environment $twig; /** * Initialization */ - public function __construct() - { + public function __construct () { $this->command = (new SingleCommandApplication()) ->setName('calibre_metadata_parser_ff') ->setDescription(<<<'EOF' @@ -49,6 +241,10 @@ EOF 'The files or directories to process' ) ->setCode($this->execute(...)); + + $this->twig = new Environment( + new ArrayLoader(self::TWIG_TEMPLATES), + ); } /** @@ -58,11 +254,9 @@ EOF * * @throws Throwable If an error occurs */ - public function run(): void - { + public function run (): void { $this->command->run(); } - /** * Execute the command * @@ -73,8 +267,7 @@ EOF * * @throws Throwable If an error occurs */ - private function execute(InputInterface $input, OutputInterface $output): int - { + private function execute (InputInterface $input, OutputInterface $output): int { foreach ($input->getArgument(self::ARGUMENT_FILES) as $file) { //region Check if file or directory exists $fileInfo = new SplFileInfo($file); @@ -84,9 +277,10 @@ EOF //endregion if ($fileInfo->isDir()) { - $this->processDirectory($file, $output); - } else { - $this->processFile($file, $output); + $this->processDirectory($fileInfo, $output); + } + else { + $this->processFile($fileInfo, $output); } } return Command::SUCCESS; @@ -95,15 +289,14 @@ EOF /** * Process a directory * - * @param SplFileInfo $directory The directory - * @param OutputInterface $output The command line output + * @param SplFileInfo $directory The directory + * @param OutputInterface $output The command line output * * @return void * * @throws Throwable If an error occurs */ - private function processDirectory(SplFileInfo $directory, OutputInterface $output): void - { + private function processDirectory (SplFileInfo $directory, OutputInterface $output): void { //region Check directory is readable $output->writeln('Processing directory: ' . $directory->getPathname() . ''); if (!$directory->isReadable()) { @@ -122,7 +315,6 @@ EOF } //endregion } - /** * Process a file * @@ -133,8 +325,8 @@ EOF * * @throws Throwable If an error occurs */ - private function processFile(SplFileInfo $file, OutputInterface $output): void - { + private function processFile (SplFileInfo $file, OutputInterface $output): void { + //region EPUB check and preparation //region Check file is OK for processing $output->writeln('Processing file: ' . $file->getPathname() . ''); @@ -158,7 +350,15 @@ EOF } //endregion //region Create subdirectory for the file (if necessary) - if (count(scandir($fileParentDirectory->getPathname())) > 1) { + $parentDirectoryFiles = scandir($fileParentDirectory->getPathname()); + $nbFiles = 0; + foreach ($parentDirectoryFiles as $parentDirectoryFile) { + if (in_array($parentDirectoryFile, ['.', '..', 'metadata.opf'])) { + continue; + } + $nbFiles++; + } + if ($nbFiles > 1) { $fileParentDirectory = new SplFileInfo($fileParentDirectory->getPathname() . DIRECTORY_SEPARATOR . $file->getBasename('.' . $file->getExtension())); if (!mkdir($fileParentDirectory->getPathname())) { $output->writeln('Failed to create subdirectory "' . $fileParentDirectory->getPathname() . '": ' . (new LastErrorException())->getMessage() . ''); @@ -167,11 +367,529 @@ EOF $oldFile = $file; $file = new SplFileInfo($fileParentDirectory . DIRECTORY_SEPARATOR . $file->getFilename()); - if (rename($oldFile->getPathname(), $file->getPathname())) { - $output->writeln('Failed to move file to subdirectory "' . $oldFile->getPathname() . '" => "' . $file->getPathname() . '": ' . (new LastErrorException())->getMessage() . ''); + if (!rename($oldFile->getPathname(), $file->getPathname())) { + $output->writeln('Failed to move file to subdirectory "' . $oldFile->getPathname() . '" => "' . $file->getPathname() . '": ' . (new LastErrorException())->getMessage() + . '' + ); + return; + } + } + //endregion + //endregion + //region Read EPUB first page + //region Open the EPUB (ZIP archive) + $fileArchive = new ZipArchive(); + if (($rep = $fileArchive->open($file->getPathname())) !== true) { + $output->writeln('Failed to open file as ZIP archive (error #' . $rep . '): ' . $fileArchive->getStatusString() . ''); + return; + } + //endregion + + //region Meta "container" file: root file path + $metaContainerPath = 'META-INF' . DIRECTORY_SEPARATOR . 'container.xml'; + $output->writeln('Processing meta "container": ' . $metaContainerPath, OutputInterface::VERBOSITY_VERBOSE); + + //region Read and parse + $metaContainerXPath = $this->readAndParseXmlDocument( + $fileArchive, + $metaContainerPath, + 'meta "container" file', + 'urn:oasis:names:tc:opendocument:xmlns:container', + $output + ); + if ($metaContainerXPath === null) { + $fileArchive->close(); + return; + } + //endregion + //region Extract root file path + if (($metaContainerRootFileNodeList = $metaContainerXPath->query('/r:container/r:rootfiles/r:rootfile[@media-type="application/oebps-package+xml"]')) === false) { + $output->writeln('Unable to query root file path'); + $fileArchive->close(); + return; + } + if ($metaContainerRootFileNodeList->count() === 0) { + $output->writeln('Unable to find root file path'); + $fileArchive->close(); + return; + } + + $rootFilePath = null; + /** @var DOMNode $metaContainerRootFileNode */ + foreach ($metaContainerRootFileNodeList as $metaContainerRootFileNode) { + if ($metaContainerRootFileNode instanceof DOMElement && $metaContainerRootFileNode->hasAttribute('full-path')) { + $rootFilePath = $metaContainerRootFileNode->getAttribute('full-path'); + break; + } + } + + if ($rootFilePath === null) { + $output->writeln('Unable to find root file path'); + $fileArchive->close(); + return; + } + //endregion + + unset($metaContainerXPath); + //endregion + //region Root file: TOC file path (ncx) + $output->writeln('Processing root file: ' . $rootFilePath, OutputInterface::VERBOSITY_VERBOSE); + + //region Read and parse + /** @noinspection HttpUrlsUsage */ + $rootFileXPath = $this->readAndParseXmlDocument( + $fileArchive, + $rootFilePath, + 'root file', + 'http://www.idpf.org/2007/opf', + $output + ); + if ($rootFileXPath === null) { + $fileArchive->close(); + return; + } + //endregion + //region Extract TOC file path (ncx) + if (($tocPagesNodeList = $rootFileXPath->query('/r:package/r:manifest/r:item[@id="ncx"][@media-type="application/x-dtbncx+xml"]')) === false) { + $output->writeln('Unable to query TOC file path'); + $fileArchive->close(); + return; + } + if ($tocPagesNodeList->count() === 0) { + $output->writeln('Unable to find TOC file path'); + $fileArchive->close(); + return; + } + + $tocFilePath = null; + /** @var DOMNode $rootFileRootFileNode */ + foreach ($tocPagesNodeList as $rootFileRootFileNode) { + if ($rootFileRootFileNode instanceof DOMElement && $rootFileRootFileNode->hasAttribute('href')) { + $tocFilePath = $rootFileRootFileNode->getAttribute('href'); + break; + } + } + + if ($tocFilePath === null) { + $output->writeln('Unable to find TOC file path'); + $fileArchive->close(); + return; + } + //endregion + + unset($rootFileXPath); + //endregion + //region TOC file: first page content + $tocFilePath = dirname($rootFilePath) . DIRECTORY_SEPARATOR . $tocFilePath; + $output->writeln('Processing TOC file: ' . $tocFilePath, OutputInterface::VERBOSITY_VERBOSE); + + //region Read and parse + /** @noinspection HttpUrlsUsage */ + $tocFileXPath = $this->readAndParseXmlDocument( + $fileArchive, + $tocFilePath, + 'TOC file', + 'http://www.daisy.org/z3986/2005/ncx/', + $output + ); + if ($tocFileXPath === null) { + $fileArchive->close(); + return; + } + //endregion + //region Extract page list with play order + if (($tocPagesNodeList = $tocFileXPath->query('/r:ncx/r:navMap/r:navPoint')) === false) { + $output->writeln('Unable to query page list'); + $fileArchive->close(); + return; + } + if ($tocPagesNodeList->count() === 0) { + $output->writeln('Unable to find page list'); + $fileArchive->close(); + return; + } + + $pages = []; + /** @var DOMNode $rootFileRootFileNode */ + foreach ($tocPagesNodeList as $tocPageNode) { + if (!$tocPageNode instanceof DOMElement) { + continue; + } + + $tocPagePlayOrder = (int)$tocPageNode->getAttribute('playOrder'); + + if (($tocPageContentNodeList = $tocFileXPath->query('./r:content', $tocPageNode)) === false) { + $output->writeln('Unable to query page (play order: ' . $tocPagePlayOrder . ') content'); + $fileArchive->close(); + return; + } + if ($tocPageContentNodeList->count() === 0) { + $output->writeln('Unable to find page (play order: ' . $tocPagePlayOrder . ') content'); + $fileArchive->close(); return; } + + $pageContentPath = null; + /** @var DOMNode $tocPageContentNode */ + foreach ($tocPageContentNodeList as $tocPageContentNode) { + if ($tocPageContentNode instanceof DOMElement && $tocPageContentNode->hasAttribute('src')) { + $pageContentPath = $tocPageContentNode->getAttribute('src'); + break; + } + } + + if ($pageContentPath === null) { + $output->writeln('Unable to find TOC file path'); + continue; + } + + $pages[$tocPagePlayOrder] = $pageContentPath; } //endregion + //region Extract first page + ksort($pages, SORT_NUMERIC); + $firstPagePath = reset($pages); + //endregion + + unset($tocFileXPath); + //endregion + //region Read the first page + $firstPagePath = dirname($tocFilePath) . DIRECTORY_SEPARATOR . $firstPagePath; + $output->writeln('Read first page: ' . $firstPagePath, OutputInterface::VERBOSITY_VERBOSE); + if (($firstPageStream = $fileArchive->getStream($firstPagePath)) === false) { + $output->writeln('Failed to open first page: ' . $fileArchive->getStatusString() . ''); + $fileArchive->close(); + return; + } + if (($firstPageContent = stream_get_contents($firstPageStream)) === false) { + $output->writeln('Failed to read first page: ' . (new LastErrorException())->getMessage() . ''); + $fileArchive->close(); + return; + } + fclose($firstPageStream); + unset($stream); + //endregion + + //region Close the EPUB + $fileArchive->close(); + //endregion + //endregion + //region Extract metadata information from first page + $output->writeln('Parsing metadata', OutputInterface::VERBOSITY_VERBOSE); + $metadata = new stdClass(); + + //region FicHub + if (preg_match('`

\s*Exported\s+with\s+the\s+assistance\s+of\s+\s*FicHub\.net\s*\s*

`i', $firstPageContent) === 1) { + $metadata->publisher = 'FanFiction.net (FicHub.net)'; + $metadata->exportedBy = 'FicHub.net'; + + if (preg_match('`

(?.+?)\s*</h1>`i', $firstPageContent, $match) === 1) { + $metadata->title = $match['title']; + } + if (preg_match('`<p>\s*(?:<b>)?\s*By\s*:\s*(?<author>.+?)\s*(?:</b>)?</p>`i', $firstPageContent, $match) === 1) { + $metadata->author = $match['author']; + } + + preg_match_all('`<p>(?:(?<type>\w+):\s*)?(?<value>.+?)</p>`is', $firstPageContent, $matches, PREG_SET_ORDER); + foreach ($matches as $match) { + switch (mb_strtolower($match['type'] ?? '')) { + case 'status': + $metadata->status = $match['value']; + break; + + case 'published': + $metadata->publishDate = $match['value']; + break; + + case 'updated': + $metadata->lastUpdateDate = $match['value']; + break; + + case 'words': + $metadata->words = $this->getMetadataNumber($match['value']); + break; + + case 'chapters': + $metadata->chapters = $this->getMetadataNumber($match['value']); + break; + + case 'rated': + $parts = explode('-', $match['value']); + + if (preg_match('`^\s*Fiction\s+(?<rated>.+?)\s*$`i', trim(array_shift($parts)), $ratedMatch) === 1) { + $metadata->rated = $ratedMatch['rated']; + } + + foreach ($parts as $part) { + if (preg_match('`^\s*(?<type>\w+)\s*:\s*(?<value>.+)\s*$`i', $part, $partMatch) !== 1) { + continue; + } + + switch (mb_strtolower($partMatch['type'] ?? '')) { + case 'language': + $metadata->language = $partMatch['value']; + break; + + case 'genre': + $metadata->genre = $partMatch['value']; + break; + + case 'characters': + $metadata->characters = $partMatch['value']; + break; + } + } + break; + + case '': + if (preg_match('`^Original\s+source\s*:\s*.+href\s*=\s*"(?<url>[^"]+)"`i', $match['value'], $matchUrl) === 1) { + $metadata->source = $matchUrl['url']; + } + elseif (preg_match('`^Exported\s+with`i', $match['value']) !== 1) { + $metadata->summary = $match['value']; + } + break; + } + } + } + //endregion + //region FF2EBOOK + elseif ( + preg_match( + '`<div\s+class="footer"\s*>\s*Converted\s+using\s+<a\s+href="http://www\.ff2ebook\.com"\s*>\s*www\.FF2EBOOK\.com\s*</a>\s*<br ?/?>\s*Date:\s*(?<date>\d{4}-\d{2}-\d{2})\s*</div>`i', + $firstPageContent, + $match + ) === 1 + ) { + $metadata->publisher = 'FanFiction.net (www.FF2EBOOK.com)'; + $metadata->exportedBy = 'www.FF2EBOOK.com (' . $match['date'] . ')'; + + if ( + preg_match( + '`<div\s+class="fic-title">\s*(?:<a\s+href\s*=\s*"(?<url>[^"]+)"\s*>)?\s*(?<title>.+?)\s*(?:</a>)?\s*</div>`i', $firstPageContent, $match + ) === 1 + ) { + $metadata->title = $match['title']; + if (($match['url'] ?? '') !== '') { + $metadata->url = $match['url']; + } + } + if ( + preg_match( + '`<div\s+class="fic-author">\s*By:\s*(?:<a\s+href\s*=\s*"[^"]+"\s*>)?\s*(?<author>.+?)\s*(?:</a>)?\s*</div>`i', $firstPageContent, $match + ) === 1 + ) { + $metadata->author = $match['author']; + } + + preg_match_all('`<span\s+class="bold">(?<type>.+?)\s*:\s*</span>\s*(?<value>.+?)\s*<br ?/?>`is', $firstPageContent, $matches, PREG_SET_ORDER); + foreach ($matches as $match) { + switch (mb_strtolower($match['type'] ?? '')) { + case 'status': + $metadata->status = match (mb_strtolower($match['value'])) { + 'completed' => 'complete', + default => $match['value'], + }; + break; + + case 'published': + $metadata->publishDate = $match['value']; + break; + + case 'last updated': + $metadata->lastUpdateDate = $match['value']; + break; + + case 'words count': + $metadata->words = $match['value']; + break; + + case 'chapters count': + $metadata->chapters = $match['value']; + break; + + case 'pairings/main char.': + $metadata->characters = (isset($metadata->characters) ? $metadata->characters . ' ' : '') . $match['value']; + break; + + case 'additional infos': + if (preg_match('`^\s*(?:(?<genre>[^>]+)\s*-\s*)?(?<pairings>[^>]+)\s*`i', $match['value'], $infoMatch) === 1) { + if (($infoMatch['genre'] ?? '') !== '') { + $metadata->genre = $infoMatch['genre']; + } + if (($infoMatch['pairings'] ?? '') !== '') { + $metadata->characters = $infoMatch['pairings'] . (isset($metadata->characters) ? ' ' . $metadata->characters : ''); + } + } + break; + + case 'summary': + $metadata->summary = $match['value']; + break; + } + } + } + //endregion + //region Unknown (error) + else { + $output->writeln('<error>Unrecognized format</error>'); + $output->writeln($firstPageContent, OutputInterface::VERBOSITY_VERY_VERBOSE); + return; + } + //endregion + //endregion + //region Generate Calibre metadata (OPF file) + $output->writeln('Generate Calibre metadata (OPF file)', OutputInterface::VERBOSITY_VERBOSE); + + $opf = new DOMDocument('1.0', 'utf-8'); + $opf->formatOutput = true; + + /** @noinspection HttpUrlsUsage */ + $root = $opf->createElementNS('http://www.idpf.org/2007/opf', 'package'); + $opf->appendChild($root); + $root->setAttribute('unique-identifier', 'uuid_id'); + $root->setAttribute('version', '2.0'); + + $opfMetadata = $opf->createElement('metadata'); + $root->appendChild($opfMetadata); + /** @noinspection HttpUrlsUsage */ + $opfMetadata->setAttributeNS(self::DOM_NAMESPACE_ATTRIBUTE, 'xmlns:dc', self::OPF_NAMESPACE_DC); + /** @noinspection HttpUrlsUsage */ + $opfMetadata->setAttributeNS(self::DOM_NAMESPACE_ATTRIBUTE, 'xmlns:opf', self::OPF_NAMESPACE_OPF); + + $title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:publisher', $metadata->publisher); + $opfMetadata->appendChild($title); + + if (isset($metadata->title)) { + $title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:title', $metadata->title); + $opfMetadata->appendChild($title); + } + if (isset($metadata->author)) { + $author = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:creator', $metadata->author); + $opfMetadata->appendChild($author); + $author->setAttribute('opf:role', 'aut'); + } + if (isset($metadata->publishDate)) { + $title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:date', $metadata->publishDate . 'T00:00:00+00:00'); + $opfMetadata->appendChild($title); + } + if (isset($metadata->language)) { + $title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:language', mb_substr(mb_strtolower($metadata->language), 0, 3)); + $opfMetadata->appendChild($title); + } + + $description = $opf->createElementNS( + self::OPF_NAMESPACE_DC, 'dc:description', + $this->twig->render( + self::TWIG_TEMPLATE_METADATA_DESCRIPTION, + (array)$metadata + ) + ); + $opfMetadata->appendChild($description); + + $this->calibreAddMetaColumn($opfMetadata, 'buy', self::CALIBRE_COLUMN_BUY); + $this->calibreAddMetaColumn($opfMetadata, 'collection', self::CALIBRE_COLUMN_COLLECTION); + $this->calibreAddMetaColumn($opfMetadata, 'collection_manual', self::CALIBRE_COLUMN_COLLECTION_MANUAL); + $this->calibreAddMetaColumn($opfMetadata, 'genre', self::CALIBRE_COLUMN_GENRE); + $this->calibreAddMetaColumn($opfMetadata, 'groups_series', self::CALIBRE_COLUMN_GROUPS_SERIES); + + if (file_put_contents($fileParentDirectory->getPathname() . DIRECTORY_SEPARATOR . 'metadata.opf', $opf->saveXML()) === false) { + $output->writeln('<error>Failed to write the Calibre metadata: ' . (new LastErrorException())->getMessage() . '</error>'); + return; + } + + $output->writeln('Calibre metadata generated !', OutputInterface::VERBOSITY_VERBOSE); + //endregion + } + + /** + * Read and parse and XML document of a ZIP archive + * + * @param ZipArchive $fileArchive The ZIP archive + * @param string $documentPath The XML document path in ZIP archive + * @param string $label The label for the command line output + * @param string $namespace The XPath namespace to register + * @param OutputInterface $output The command line output + * + * @return DOMXPath|null The XPath or Null if failed + */ + private function readAndParseXmlDocument (ZipArchive $fileArchive, string $documentPath, string $label, string $namespace, OutputInterface $output): ?DOMXPath { + //region Read the file + if (($stream = $fileArchive->getStream($documentPath)) === false) { + $output->writeln('<error>Failed to open ' . $label . ': ' . $fileArchive->getStatusString() . '</error>'); + return null; + } + if (($content = stream_get_contents($stream)) === false) { + $output->writeln('<error>Failed to read ' . $label . ': ' . (new LastErrorException())->getMessage() . '</error>'); + return null; + } + fclose($stream); + unset($stream); + //endregion + //region Parse it as XML + libxml_clear_errors(); + $dom = new DOMDocument(); + if (!$dom->loadXML($content, LIBXML_COMPACT)) { + $errors = libxml_get_errors(); + + $output->writeln('<error>Failed to parse ' . $label . ': ' . count($errors) . ' errors</error>'); + foreach ($errors as $error) { + $levelName = match ($error->level) { + LIBXML_ERR_WARNING => 'Warning', + LIBXML_ERR_ERROR => 'Error', + LIBXML_ERR_FATAL => 'Fatal', + LIBXML_ERR_NONE => 'None', + default => 'Unknown (' . $error->level . ')' + }; + $output->writeln( + '<error>' . $levelName . ' #' . $error->code . ': ' . $error->message . ' (line: ' . $error->line . ', column: ' . $error->column . ')</error>', + OutputInterface::VERBOSITY_VERBOSE + ); + } + return null; + } + //endregion + //region Register XPath namespaces + $xpath = new DOMXPath($dom); + /** @noinspection HttpUrlsUsage */ + if (!$xpath->registerNamespace('r', $namespace)) { + $output->writeln('<error>Failed to register ' . $label . ' XPath namespace</error>'); + $fileArchive->close(); + return null; + } + //endregion + return $xpath; + } + /** + * Convert a number to the metadata form + * + * @param string $number The number to convert + * + * @return string The number in metadata form + */ + private function getMetadataNumber (string $number): string { + $numberLength = mb_strlen($number); + $number = MbstringExtended::str_pad($number, $numberLength + 3 - ($numberLength % 3), '0', STR_PAD_LEFT); + $numberParts = mb_str_split($number, 3); + $numberParts[0] = ltrim($numberParts[0], '0'); + return implode(',', $numberParts); + } + /** + * Add a Calibre meta column + * + * @param DOMElement $metadata The "metadata" node + * @param string $name The column name + * @param array $content The column content and definition + * + * @return void + * + * @throws DOMException If an error occurs + */ + private function calibreAddMetaColumn (DOMElement $metadata, string $name, array $content): void { + $metaColumn = $metadata->ownerDocument->createElement('meta'); + $metadata->appendChild($metaColumn); + + $metaColumn->setAttribute('name', 'calibre:user_metadata:#' . $name); + $metaColumn->setAttribute('content', json_encode($content)); } } \ No newline at end of file