<<<'TWIG'

{{ description }}

Chapters: {{ chapters ?? 1 }}

Word count: {{ words }}

Published: {{ publishDate }}

Last update: {{ lastUpdateDate ?? publishDate }}

Status: {{ status }}

Rated: {{ rated }}

Genre: {{ genre }}

Pairings: {{ characters }}

Source link:{{ url|default(' ') }}

Exported by: {{ exportedBy }}

TWIG , ]; private const array CALIBRE_COLUMN_BUY = [ 'table' => 'custom_column_9', 'column' => 'value', 'datatype' => 'enumeration', 'is_multiple' => null, 'kind' => 'field', 'name' => 'Acheté', 'search_terms' => [ '#buy', ], 'label' => 'buy', 'colnum' => 9, 'display' => [ 'description' => '', 'enum_values' => [ 'Non', 'En ligne', 'Papier', 'Libre', ], 'use_decorations' => 0, 'enum_colors' => [ ], ], 'is_custom' => true, 'is_category' => true, 'link_column' => 'value', 'category_sort' => 'value', 'is_csp' => false, 'is_editable' => true, 'rec_index' => 22, '#value#' => 'Libre', '#extra#' => null, 'is_multiple2' => [ ], ]; private const array CALIBRE_COLUMN_COLLECTION = [ 'table' => 'custom_column_5', 'column' => 'value', 'datatype' => 'composite', 'is_multiple' => null, 'kind' => 'field', 'name' => 'Collection', 'search_terms' => [ '#collection', ], 'label' => 'collection', 'colnum' => 5, 'display' => [ 'make_category' => true, 'composite_sort' => 'text', 'contains_html' => false, 'use_decorations' => 0, 'composite_template' => '{#collection_manual:ifempty({#groups_series}{#groups_series_index:0>2s| [|]}{series:\'contains($,\'<ordre>\',\'\',strcat(test(field(\'#groups_series\'),\' - \', \'\'),$))\'})}', 'description' => '', ], 'is_custom' => true, 'is_category' => false, 'link_column' => 'value', 'category_sort' => 'value', 'is_csp' => false, 'is_editable' => true, 'rec_index' => 23, '#value#' => 'Fan-Fiction - Harry Potter', 'is_multiple2' => [ ], ]; private const array CALIBRE_COLUMN_COLLECTION_MANUAL = [ 'table' => 'custom_column_7', 'column' => 'value', 'datatype' => 'text', 'is_multiple' => null, 'kind' => 'field', 'name' => 'Nom manuel de collection', 'search_terms' => [ '#collection_manual', ], 'label' => 'collection_manual', 'colnum' => 7, 'display' => [ 'use_decorations' => 0, 'description' => '', ], 'is_custom' => true, 'is_category' => true, 'link_column' => 'value', 'category_sort' => 'value', 'is_csp' => false, 'is_editable' => true, 'rec_index' => 24, '#value#' => 'Fan-Fiction - Harry Potter', '#extra#' => null, 'is_multiple2' => [ ], ]; private const array CALIBRE_COLUMN_GENRE = [ 'table' => 'custom_column_2', 'column' => 'value', 'datatype' => 'text', 'is_multiple' => '|', 'kind' => 'field', 'name' => 'Genre', 'search_terms' => [ '#genre', ], 'label' => 'genre', 'colnum' => 2, 'display' => [ 'is_names' => false, 'description' => '', ], 'is_custom' => true, 'is_category' => true, 'link_column' => 'value', 'category_sort' => 'value', 'is_csp' => false, 'is_editable' => true, 'rec_index' => 25, '#value#' => [ 'Fan-Fiction', 'Fantastique', ], '#extra#' => null, 'is_multiple2' => [ 'cache_to_list' => '|', 'ui_to_list' => ',', 'list_to_ui' => ', ', ], ]; private const array CALIBRE_COLUMN_GROUPS_SERIES = [ 'table' => 'custom_column_6', 'column' => 'value', 'datatype' => 'series', 'is_multiple' => null, 'kind' => 'field', 'name' => 'Groupe de séries', 'search_terms' => [ '#groups_series', ], 'label' => 'groups_series', 'colnum' => 6, 'display' => [ 'description' => '', ], 'is_custom' => true, 'is_category' => true, 'link_column' => 'value', 'category_sort' => 'value', 'is_csp' => false, 'is_editable' => true, 'rec_index' => 26, '#value#' => null, '#extra#' => null, 'is_multiple2' => [ ], ]; /** * @var SingleCommandApplication The command */ private readonly SingleCommandApplication $command; /** * @var Environment The twig environment */ private Environment $twig; /** * Initialization */ public function __construct () { $this->command = (new SingleCommandApplication()) ->setName('calibre_metadata_parser_ff') ->setDescription(<<<'EOF' Calibre metadata parser for FanFiction files Each input file, if valid (EPUB file), is transformed to a Calibre input directory, containing the EPUB file and the metadata file (metadata.opf). The metadata are extracted from the "title" page of the EPUB, based on FicHub.net or FF2EBOOK.com. EOF ) ->addArgument( Main::ARGUMENT_FILES, InputArgument::REQUIRED | InputArgument::IS_ARRAY, 'The files or directories to process' ) ->setCode($this->execute(...)); $this->twig = new Environment( new ArrayLoader(self::TWIG_TEMPLATES), ); } /** * Run the command * * @return void * * @throws Throwable If an error occurs */ public function run (): void { $this->command->run(); } /** * Execute the command * * @param InputInterface $input The command line input * @param OutputInterface $output The command line output * * @return int The command exit status code * * @throws Throwable If an error occurs */ private function execute (InputInterface $input, OutputInterface $output): int { foreach ($input->getArgument(self::ARGUMENT_FILES) as $file) { //region Check if file or directory exists $fileInfo = new SplFileInfo($file); if (!file_exists($file)) { $output->writeln('Unable to find ' . ($fileInfo->isDir() ? 'directory' : 'file') . ': ' . $fileInfo->getPathname() . ''); } //endregion if ($fileInfo->isDir()) { $this->processDirectory($fileInfo, $output); } else { $this->processFile($fileInfo, $output); } } return Command::SUCCESS; } /** * Process a directory * * @param SplFileInfo $directory The directory * @param OutputInterface $output The command line output * * @return void * * @throws Throwable If an error occurs */ private function processDirectory (SplFileInfo $directory, OutputInterface $output): void { //region Check directory is readable $output->writeln('Processing directory: ' . $directory->getPathname() . ''); if (!$directory->isReadable()) { $output->writeln('The directory is not readable'); return; } //endregion //region Treat each EPUB files (check recursively) $directoryIterator = new RecursiveDirectoryIterator($directory->getPathname()); $directoryIterator = new RecursiveIteratorIterator($directoryIterator); $directoryIterator = new RegexIterator($directoryIterator, /** @lang PhpRegExp */ '/\.epub$/i', RegexIterator::MATCH); /** @var SplFileInfo $file */ foreach ($directoryIterator as $file) { $this->processFile($file, $output); } //endregion } /** * Process a file * * @param SplFileInfo $file The file * @param OutputInterface $output The command line output * * @return void * * @throws Throwable If an error occurs */ private function processFile (SplFileInfo $file, OutputInterface $output): void { //region EPUB check and preparation //region Check file is OK for processing $output->writeln('Processing file: ' . $file->getPathname() . ''); if (mb_strtolower($file->getExtension()) !== 'epub') { $output->writeln('Invalid file extension, "epub" expected'); return; } if (!$file->isReadable()) { $output->writeln('The file is not readable'); return; } $fileParentDirectory = new SplFileInfo(dirname($file->getRealPath())); if (!$fileParentDirectory->isReadable()) { $output->writeln('The file parent directory is not readable'); return; } if (!$fileParentDirectory->isWritable()) { $output->writeln('The file parent directory is not writable'); return; } //endregion //region Create subdirectory for the file (if necessary) $parentDirectoryFiles = scandir($fileParentDirectory->getPathname()); $nbFiles = 0; foreach ($parentDirectoryFiles as $parentDirectoryFile) { if (in_array($parentDirectoryFile, ['.', '..', 'metadata.opf'])) { continue; } $nbFiles++; } if ($nbFiles > 1) { $fileParentDirectory = new SplFileInfo($fileParentDirectory->getPathname() . DIRECTORY_SEPARATOR . $file->getBasename('.' . $file->getExtension())); if (!mkdir($fileParentDirectory->getPathname())) { $output->writeln('Failed to create subdirectory "' . $fileParentDirectory->getPathname() . '": ' . (new LastErrorException())->getMessage() . ''); return; } $oldFile = $file; $file = new SplFileInfo($fileParentDirectory . DIRECTORY_SEPARATOR . $file->getFilename()); if (!rename($oldFile->getPathname(), $file->getPathname())) { $output->writeln('Failed to move file to subdirectory "' . $oldFile->getPathname() . '" => "' . $file->getPathname() . '": ' . (new LastErrorException())->getMessage() . '' ); return; } } //endregion //endregion //region Read EPUB first page //region Open the EPUB (ZIP archive) $fileArchive = new ZipArchive(); if (($rep = $fileArchive->open($file->getPathname())) !== true) { $output->writeln('Failed to open file as ZIP archive (error #' . $rep . '): ' . $fileArchive->getStatusString() . ''); return; } //endregion //region Meta "container" file: root file path $metaContainerPath = 'META-INF/container.xml'; $output->writeln('Processing meta "container": ' . $metaContainerPath, OutputInterface::VERBOSITY_VERBOSE); //region Read and parse $metaContainerXPath = $this->readAndParseXmlDocument( $fileArchive, $metaContainerPath, 'meta "container" file', 'urn:oasis:names:tc:opendocument:xmlns:container', $output ); if ($metaContainerXPath === null) { $fileArchive->close(); return; } //endregion //region Extract root file path if (($metaContainerRootFileNodeList = $metaContainerXPath->query('/r:container/r:rootfiles/r:rootfile[@media-type="application/oebps-package+xml"]')) === false) { $output->writeln('Unable to query root file path'); $fileArchive->close(); return; } if ($metaContainerRootFileNodeList->count() === 0) { $output->writeln('Unable to find root file path'); $fileArchive->close(); return; } $rootFilePath = null; /** @var DOMNode $metaContainerRootFileNode */ foreach ($metaContainerRootFileNodeList as $metaContainerRootFileNode) { if ($metaContainerRootFileNode instanceof DOMElement && $metaContainerRootFileNode->hasAttribute('full-path')) { $rootFilePath = $metaContainerRootFileNode->getAttribute('full-path'); break; } } if ($rootFilePath === null) { $output->writeln('Unable to find root file path'); $fileArchive->close(); return; } //endregion unset($metaContainerXPath); //endregion //region Root file: TOC file path (ncx) $output->writeln('Processing root file: ' . $rootFilePath, OutputInterface::VERBOSITY_VERBOSE); //region Read and parse /** @noinspection HttpUrlsUsage */ $rootFileXPath = $this->readAndParseXmlDocument( $fileArchive, $rootFilePath, 'root file', 'http://www.idpf.org/2007/opf', $output ); if ($rootFileXPath === null) { $fileArchive->close(); return; } //endregion //region Extract TOC file path (ncx) if (($tocPagesNodeList = $rootFileXPath->query('/r:package/r:manifest/r:item[@id="ncx"][@media-type="application/x-dtbncx+xml"]')) === false) { $output->writeln('Unable to query TOC file path'); $fileArchive->close(); return; } if ($tocPagesNodeList->count() === 0) { $output->writeln('Unable to find TOC file path'); $fileArchive->close(); return; } $tocFilePath = null; /** @var DOMNode $rootFileRootFileNode */ foreach ($tocPagesNodeList as $rootFileRootFileNode) { if ($rootFileRootFileNode instanceof DOMElement && $rootFileRootFileNode->hasAttribute('href')) { $tocFilePath = $rootFileRootFileNode->getAttribute('href'); break; } } if ($tocFilePath === null) { $output->writeln('Unable to find TOC file path'); $fileArchive->close(); return; } //endregion //region Delete description if present //region Deletion form DOM $rootFileDirty = false; /** @noinspection HttpUrlsUsage */ $rootFileXPath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/'); if (($descriptionNodeList = $rootFileXPath->query('/r:package/r:metadata/dc:description')) !== false && $descriptionNodeList->count() > 0) { $output->writeln($descriptionNodeList->count() . ' descriptions found → removing', OutputInterface::VERBOSITY_VERBOSE); /** @var DOMNode $descriptionNode */ foreach ($descriptionNodeList as $descriptionNode) { $descriptionNode->parentNode->removeChild($descriptionNode); $rootFileDirty = true; } } else { $output->writeln('no descriptions found', OutputInterface::VERBOSITY_VERBOSE); } //endregion //region Overwrite root file in ZIP if ($rootFileDirty) { //region Get temporary TOC file $rootFilePathTemp = sys_get_temp_dir() . DIRECTORY_SEPARATOR . basename($rootFilePath); $output->writeln('Temporary TOC file path: ' . $rootFilePathTemp, OutputInterface::VERBOSITY_VERBOSE); if (file_exists($rootFilePathTemp)) { if (!unlink($rootFilePathTemp)) { $output->writeln('Unable to delete existing temporary TOC file (' . $rootFilePathTemp . '): ' . (new LastErrorException())->getMessage() . ''); $fileArchive->close(); return; } } //endregion //region Write temporary TOC file $rootFileDocument = $rootFileXPath->document; $rootFileDocument->formatOutput = true; if (file_put_contents($rootFilePathTemp, $rootFileDocument->saveXML()) === false) { $output->writeln('Failed to write temporary TOC file (' . $rootFilePathTemp . '): ' . (new LastErrorException())->getMessage() . ''); $fileArchive->close(); return; } //endregion //region Replace TOC file in ZIP if (($rootFileIndex = $fileArchive->locateName($rootFilePath)) === false) { $output->writeln('Unable to locate TOC file index: ' . $rootFilePath . ''); $fileArchive->close(); return; } /** @noinspection PhpVoidFunctionResultUsedInspection */ if (!$fileArchive->replaceFile( $rootFilePathTemp, $rootFileIndex, flags: ZipArchive::FL_ENC_UTF_8 )) { $output->writeln('Unable to replace TOC file: ' . $fileArchive->getStatusString() . ''); $fileArchive->close(); return; } //endregion } //endregion //endregion unset($rootFileXPath); //endregion //region TOC file: first page content $tocFilePath = dirname($rootFilePath) . '/' . $tocFilePath; $output->writeln('Processing TOC file: ' . $tocFilePath, OutputInterface::VERBOSITY_VERBOSE); //region Read and parse /** @noinspection HttpUrlsUsage */ $tocFileXPath = $this->readAndParseXmlDocument( $fileArchive, $tocFilePath, 'TOC file', 'http://www.daisy.org/z3986/2005/ncx/', $output ); if ($tocFileXPath === null) { $fileArchive->close(); return; } //endregion //region Extract page list with play order if (($tocPagesNodeList = $tocFileXPath->query('/r:ncx/r:navMap/r:navPoint')) === false) { $output->writeln('Unable to query page list'); $fileArchive->close(); return; } if ($tocPagesNodeList->count() === 0) { $output->writeln('Unable to find page list'); $fileArchive->close(); return; } $pages = []; /** @var DOMNode $rootFileRootFileNode */ foreach ($tocPagesNodeList as $tocPageNode) { if (!$tocPageNode instanceof DOMElement) { continue; } $tocPagePlayOrder = (int)$tocPageNode->getAttribute('playOrder'); if (($tocPageContentNodeList = $tocFileXPath->query('./r:content', $tocPageNode)) === false) { $output->writeln('Unable to query page (play order: ' . $tocPagePlayOrder . ') content'); $fileArchive->close(); return; } if ($tocPageContentNodeList->count() === 0) { $output->writeln('Unable to find page (play order: ' . $tocPagePlayOrder . ') content'); $fileArchive->close(); return; } $pageContentPath = null; /** @var DOMNode $tocPageContentNode */ foreach ($tocPageContentNodeList as $tocPageContentNode) { if ($tocPageContentNode instanceof DOMElement && $tocPageContentNode->hasAttribute('src')) { $pageContentPath = $tocPageContentNode->getAttribute('src'); break; } } if ($pageContentPath === null) { $output->writeln('Unable to find TOC file path'); continue; } $pages[$tocPagePlayOrder] = $pageContentPath; } //endregion //region Extract first page ksort($pages, SORT_NUMERIC); $firstPagePath = reset($pages); //endregion unset($tocFileXPath); //endregion //region Read the first page $firstPagePath = dirname($tocFilePath) . '/' . $firstPagePath; $output->writeln('Read first page: ' . $firstPagePath, OutputInterface::VERBOSITY_VERBOSE); if (($firstPageStream = $fileArchive->getStream($firstPagePath)) === false) { $output->writeln('Failed to open first page (' . $firstPagePath . '): ' . $fileArchive->getStatusString() . ''); $fileArchive->close(); return; } if (($firstPageContent = stream_get_contents($firstPageStream)) === false) { $output->writeln('Failed to read first page: ' . (new LastErrorException())->getMessage() . ''); $fileArchive->close(); return; } fclose($firstPageStream); unset($stream); //endregion //region Close the EPUB $fileArchive->close(); //endregion //endregion //region Extract metadata information from first page $output->writeln('Parsing metadata', OutputInterface::VERBOSITY_VERBOSE); $metadata = new stdClass(); //region FicHub if (preg_match('`

\s*Exported\s+with\s+the\s+assistance\s+of\s+\s*FicHub\.net\s*\s*

`i', $firstPageContent) === 1) { $metadata->publisher = 'FanFiction.net (FicHub.net)'; $metadata->exportedBy = 'FicHub.net'; if (preg_match('`

(?.+?)\s*</h1>`i', $firstPageContent, $match) === 1) { $metadata->title = $match['title']; } if (preg_match('`<p>\s*(?:<b>)?\s*By\s*:\s*(?<author>.+?)\s*(?:</b>)?</p>`i', $firstPageContent, $match) === 1) { $metadata->author = $match['author']; } preg_match_all('`<p>(?:(?<type>\w+):\s*)?(?<value>.+?)</p>`is', $firstPageContent, $matches, PREG_SET_ORDER); foreach ($matches as $match) { switch (mb_strtolower($match['type'] ?? '')) { case 'status': $metadata->status = $match['value']; break; case 'published': $metadata->publishDate = $match['value']; break; case 'updated': $metadata->lastUpdateDate = $match['value']; break; case 'words': $metadata->words = $this->getMetadataNumber($match['value']); break; case 'chapters': $metadata->chapters = $this->getMetadataNumber($match['value']); break; case 'rated': $parts = explode('-', $match['value']); if (preg_match('`^\s*Fiction\s+(?<rated>.+?)\s*$`i', trim(array_shift($parts)), $ratedMatch) === 1) { $metadata->rated = $ratedMatch['rated']; } foreach ($parts as $part) { if (preg_match('`^\s*(?<type>\w+)\s*:\s*(?<value>.+)\s*$`i', $part, $partMatch) !== 1) { continue; } switch (mb_strtolower($partMatch['type'] ?? '')) { case 'language': $metadata->language = $partMatch['value']; break; case 'genre': $metadata->genre = $partMatch['value']; break; case 'characters': $metadata->characters = $partMatch['value']; break; } } break; case '': if (preg_match('`^Original\s+source\s*:\s*.+href\s*=\s*"(?<url>[^"]+)"`i', $match['value'], $matchUrl) === 1) { $metadata->url = $matchUrl['url']; } elseif (preg_match('`^Exported\s+with`i', $match['value']) !== 1) { $metadata->summary = $match['value']; } break; } } } //endregion //region FF2EBOOK elseif ( preg_match( '`<div\s+class="footer"\s*>\s*Converted\s+using\s+<a\s+href="http://www\.ff2ebook\.com"\s*>\s*www\.FF2EBOOK\.com\s*</a>\s*<br ?/?>\s*Date:\s*(?<date>\d{4}-\d{2}-\d{2})\s*</div>`i', $firstPageContent, $match ) === 1 ) { $metadata->publisher = 'FanFiction.net (www.FF2EBOOK.com)'; $metadata->exportedBy = 'www.FF2EBOOK.com (' . $match['date'] . ')'; if ( preg_match( '`<div\s+class="fic-title">\s*(?:<a\s+href\s*=\s*"(?<url>[^"]+)"\s*>)?\s*(?<title>.+?)\s*(?:</a>)?\s*</div>`i', $firstPageContent, $match ) === 1 ) { $metadata->title = $match['title']; if (($match['url'] ?? '') !== '') { $metadata->url = $match['url']; } } if ( preg_match( '`<div\s+class="fic-author">\s*By:\s*(?:<a\s+href\s*=\s*"[^"]+"\s*>)?\s*(?<author>.+?)\s*(?:</a>)?\s*</div>`i', $firstPageContent, $match ) === 1 ) { $metadata->author = $match['author']; } preg_match_all('`<span\s+class="bold">(?<type>.+?)\s*:\s*</span>\s*(?<value>.+?)\s*<br ?/?>`is', $firstPageContent, $matches, PREG_SET_ORDER); foreach ($matches as $match) { switch (mb_strtolower($match['type'] ?? '')) { case 'status': $metadata->status = match (mb_strtolower($match['value'])) { 'completed' => 'complete', default => $match['value'], }; break; case 'published': $metadata->publishDate = $match['value']; break; case 'last updated': $metadata->lastUpdateDate = $match['value']; break; case 'words count': $metadata->words = $match['value']; break; case 'chapters count': $metadata->chapters = $match['value']; break; case 'pairings/main char.': $metadata->characters = (isset($metadata->characters) ? $metadata->characters . ' ' : '') . $match['value']; break; case 'additional infos': if (preg_match('`^\s*(?:(?<genre>[^>]+)\s*-\s*)?(?<pairings>[^>]+)\s*`i', $match['value'], $infoMatch) === 1) { if (($infoMatch['genre'] ?? '') !== '') { $metadata->genre = $infoMatch['genre']; } if (($infoMatch['pairings'] ?? '') !== '') { $metadata->characters = $infoMatch['pairings'] . (isset($metadata->characters) ? ' ' . $metadata->characters : ''); } } break; case 'summary': $metadata->summary = $match['value']; break; } } } //endregion //region Unknown (error) else { $output->writeln('<error>Unrecognized format</error>'); $output->writeln($firstPageContent, OutputInterface::VERBOSITY_VERY_VERBOSE); return; } //endregion //endregion //region Generate Calibre metadata (OPF file) $output->writeln('Generate Calibre metadata (OPF file)', OutputInterface::VERBOSITY_VERBOSE); $opf = new DOMDocument('1.0', 'utf-8'); $opf->formatOutput = true; /** @noinspection HttpUrlsUsage */ $root = $opf->createElementNS('http://www.idpf.org/2007/opf', 'package'); $opf->appendChild($root); $root->setAttribute('unique-identifier', 'uuid_id'); $root->setAttribute('version', '2.0'); $opfMetadata = $opf->createElement('metadata'); $root->appendChild($opfMetadata); /** @noinspection HttpUrlsUsage */ $opfMetadata->setAttributeNS(self::DOM_NAMESPACE_ATTRIBUTE, 'xmlns:dc', self::OPF_NAMESPACE_DC); /** @noinspection HttpUrlsUsage */ $opfMetadata->setAttributeNS(self::DOM_NAMESPACE_ATTRIBUTE, 'xmlns:opf', self::OPF_NAMESPACE_OPF); $publisher = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:publisher', $metadata->publisher); $opfMetadata->appendChild($publisher); if (isset($metadata->title)) { $title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:title', $metadata->title); $opfMetadata->appendChild($title); } if (isset($metadata->author)) { $author = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:creator', $metadata->author); $opfMetadata->appendChild($author); $author->setAttribute('opf:role', 'aut'); } if (isset($metadata->publishDate)) { $title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:date', $metadata->publishDate . 'T00:00:00+00:00'); $opfMetadata->appendChild($title); } if (isset($metadata->language)) { $title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:language', mb_substr(mb_strtolower($metadata->language), 0, 3)); $opfMetadata->appendChild($title); } $description = $opf->createElementNS( self::OPF_NAMESPACE_DC, 'dc:description', $this->twig->render( self::TWIG_TEMPLATE_METADATA_DESCRIPTION, (array)$metadata ) ); $opfMetadata->appendChild($description); $this->calibreAddMetaColumn($opfMetadata, 'buy', self::CALIBRE_COLUMN_BUY); $this->calibreAddMetaColumn($opfMetadata, 'collection', self::CALIBRE_COLUMN_COLLECTION); $this->calibreAddMetaColumn($opfMetadata, 'collection_manual', self::CALIBRE_COLUMN_COLLECTION_MANUAL); $this->calibreAddMetaColumn($opfMetadata, 'genre', self::CALIBRE_COLUMN_GENRE); $this->calibreAddMetaColumn($opfMetadata, 'groups_series', self::CALIBRE_COLUMN_GROUPS_SERIES); if (file_put_contents($fileParentDirectory->getPathname() . DIRECTORY_SEPARATOR . 'metadata.opf', $opf->saveXML()) === false) { $output->writeln('<error>Failed to write the Calibre metadata: ' . (new LastErrorException())->getMessage() . '</error>'); return; } $output->writeln('Calibre metadata generated !', OutputInterface::VERBOSITY_VERBOSE); //endregion } /** * Read and parse and XML document of a ZIP archive * * @param ZipArchive $fileArchive The ZIP archive * @param string $documentPath The XML document path in ZIP archive * @param string $label The label for the command line output * @param string $namespace The XPath namespace to register * @param OutputInterface $output The command line output * * @return DOMXPath|null The XPath or Null if failed */ private function readAndParseXmlDocument (ZipArchive $fileArchive, string $documentPath, string $label, string $namespace, OutputInterface $output): ?DOMXPath { //region Read the file if (($stream = $fileArchive->getStream($documentPath)) === false) { $output->writeln('<error>Failed to open ' . $label . ': ' . $fileArchive->getStatusString() . '</error>'); return null; } if (($content = stream_get_contents($stream)) === false) { $output->writeln('<error>Failed to read ' . $label . ': ' . (new LastErrorException())->getMessage() . '</error>'); return null; } fclose($stream); unset($stream); //endregion //region Parse it as XML libxml_clear_errors(); $dom = new DOMDocument(); if (!$dom->loadXML($content, LIBXML_COMPACT)) { $errors = libxml_get_errors(); $output->writeln('<error>Failed to parse ' . $label . ': ' . count($errors) . ' errors</error>'); foreach ($errors as $error) { $levelName = match ($error->level) { LIBXML_ERR_WARNING => 'Warning', LIBXML_ERR_ERROR => 'Error', LIBXML_ERR_FATAL => 'Fatal', LIBXML_ERR_NONE => 'None', default => 'Unknown (' . $error->level . ')' }; $output->writeln( '<error>' . $levelName . ' #' . $error->code . ': ' . $error->message . ' (line: ' . $error->line . ', column: ' . $error->column . ')</error>', OutputInterface::VERBOSITY_VERBOSE ); } return null; } //endregion //region Register XPath namespaces $xpath = new DOMXPath($dom); /** @noinspection HttpUrlsUsage */ if (!$xpath->registerNamespace('r', $namespace)) { $output->writeln('<error>Failed to register ' . $label . ' XPath namespace</error>'); $fileArchive->close(); return null; } //endregion return $xpath; } /** * Convert a number to the metadata form * * @param string $number The number to convert * * @return string The number in metadata form */ private function getMetadataNumber (string $number): string { $numberLength = mb_strlen($number); $number = MbstringExtended::str_pad($number, $numberLength + 3 - ($numberLength % 3), '0', STR_PAD_LEFT); $numberParts = mb_str_split($number, 3); $numberParts[0] = ltrim($numberParts[0], '0'); return implode(',', $numberParts); } /** * Add a Calibre meta column * * @param DOMElement $metadata The "metadata" node * @param string $name The column name * @param array $content The column content and definition * * @return void * * @throws DOMException If an error occurs */ private function calibreAddMetaColumn (DOMElement $metadata, string $name, array $content): void { $metaColumn = $metadata->ownerDocument->createElement('meta'); $metadata->appendChild($metaColumn); $metaColumn->setAttribute('name', 'calibre:user_metadata:#' . $name); $metaColumn->setAttribute('content', json_encode($content)); } }