You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

958 lines
38 KiB
PHP

<?php
/** @noinspection HtmlUnknownTarget */
namespace jrosset;
use DOMDocument;
use DOMElement;
use DOMException;
use DOMNode;
use DOMXPath;
use jrosset\LastErrorException\LastErrorException;
use RecursiveDirectoryIterator;
use RecursiveIteratorIterator;
use RegexIterator;
use SplFileInfo;
use stdClass;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\SingleCommandApplication;
use Throwable;
use Twig\Environment;
use Twig\Loader\ArrayLoader;
use ZipArchive;
/**
* The "main" command
*/
class Main {
/**
* Argument name for input files
*/
private const string ARGUMENT_FILES = 'files';
private const string DOM_NAMESPACE_ATTRIBUTE = 'http://www.w3.org/2000/xmlns/';
private const string OPF_NAMESPACE_DC = 'http://purl.org/dc/elements/1.1/';
private const string OPF_NAMESPACE_OPF = 'http://www.idpf.org/2007/opf';
private const string TWIG_TEMPLATE_METADATA_DESCRIPTION = 'metadata_description.html';
private const array TWIG_TEMPLATES = [
self::TWIG_TEMPLATE_METADATA_DESCRIPTION => <<<'TWIG'
<div>
<p>{{ description }}</p>
<p><strong>Chapters:</strong> {{ chapters ?? 1 }}</p>
<p><strong>Word count:</strong> {{ words }}</p>
<p><strong>Published:</strong> {{ publishDate }}</p>
<p><strong>Last update:</strong> {{ lastUpdateDate ?? publishDate }}</p>
<p><strong>Status:</strong> {{ status }}</p>
<p><strong>Rated:</strong> {{ rated }}</p>
<p><strong>Genre:</strong> {{ genre }}</p>
<p><strong>Pairings:</strong> {{ characters }}</p>
<p><strong>Source link:</strong><a href="{{ url|default('#') }}"><span style="color: #6cb4ee">{{ url|default(' ') }}</span></a></p>
<p><strong>Exported by:</strong> {{ exportedBy }}</p>
</div>
TWIG
,
];
private const array CALIBRE_COLUMN_BUY = [
'table' => 'custom_column_9',
'column' => 'value',
'datatype' => 'enumeration',
'is_multiple' => null,
'kind' => 'field',
'name' => 'Acheté',
'search_terms' => [
'#buy',
],
'label' => 'buy',
'colnum' => 9,
'display' => [
'description' => '',
'enum_values' => [
'Non',
'En ligne',
'Papier',
'Libre',
],
'use_decorations' => 0,
'enum_colors' => [
],
],
'is_custom' => true,
'is_category' => true,
'link_column' => 'value',
'category_sort' => 'value',
'is_csp' => false,
'is_editable' => true,
'rec_index' => 22,
'#value#' => 'Libre',
'#extra#' => null,
'is_multiple2' => [
],
];
private const array CALIBRE_COLUMN_COLLECTION = [
'table' => 'custom_column_5',
'column' => 'value',
'datatype' => 'composite',
'is_multiple' => null,
'kind' => 'field',
'name' => 'Collection',
'search_terms' => [
'#collection',
],
'label' => 'collection',
'colnum' => 5,
'display' => [
'make_category' => true,
'composite_sort' => 'text',
'contains_html' => false,
'use_decorations' => 0,
'composite_template' => '{#collection_manual:ifempty({#groups_series}{#groups_series_index:0&gt;2s| [|]}{series:\'contains($,\'&lt;ordre&gt;\',\'\',strcat(test(field(\'#groups_series\'),\' - \', \'\'),$))\'})}',
'description' => '',
],
'is_custom' => true,
'is_category' => false,
'link_column' => 'value',
'category_sort' => 'value',
'is_csp' => false,
'is_editable' => true,
'rec_index' => 23,
'#value#' => 'Fan-Fiction - Harry Potter',
'is_multiple2' => [
],
];
private const array CALIBRE_COLUMN_COLLECTION_MANUAL = [
'table' => 'custom_column_7',
'column' => 'value',
'datatype' => 'text',
'is_multiple' => null,
'kind' => 'field',
'name' => 'Nom manuel de collection',
'search_terms' => [
'#collection_manual',
],
'label' => 'collection_manual',
'colnum' => 7,
'display' => [
'use_decorations' => 0,
'description' => '',
],
'is_custom' => true,
'is_category' => true,
'link_column' => 'value',
'category_sort' => 'value',
'is_csp' => false,
'is_editable' => true,
'rec_index' => 24,
'#value#' => 'Fan-Fiction - Harry Potter',
'#extra#' => null,
'is_multiple2' => [
],
];
private const array CALIBRE_COLUMN_GENRE = [
'table' => 'custom_column_2',
'column' => 'value',
'datatype' => 'text',
'is_multiple' => '|',
'kind' => 'field',
'name' => 'Genre',
'search_terms' => [
'#genre',
],
'label' => 'genre',
'colnum' => 2,
'display' => [
'is_names' => false,
'description' => '',
],
'is_custom' => true,
'is_category' => true,
'link_column' => 'value',
'category_sort' => 'value',
'is_csp' => false,
'is_editable' => true,
'rec_index' => 25,
'#value#' => [
'Fan-Fiction',
'Fantastique',
],
'#extra#' => null,
'is_multiple2' => [
'cache_to_list' => '|',
'ui_to_list' => ',',
'list_to_ui' => ', ',
],
];
private const array CALIBRE_COLUMN_GROUPS_SERIES = [
'table' => 'custom_column_6',
'column' => 'value',
'datatype' => 'series',
'is_multiple' => null,
'kind' => 'field',
'name' => 'Groupe de séries',
'search_terms' => [
'#groups_series',
],
'label' => 'groups_series',
'colnum' => 6,
'display' => [
'description' => '',
],
'is_custom' => true,
'is_category' => true,
'link_column' => 'value',
'category_sort' => 'value',
'is_csp' => false,
'is_editable' => true,
'rec_index' => 26,
'#value#' => null,
'#extra#' => null,
'is_multiple2' => [
],
];
/**
* @var SingleCommandApplication The command
*/
private readonly SingleCommandApplication $command;
/**
* @var Environment The twig environment
*/
private Environment $twig;
/**
* Initialization
*/
public function __construct () {
$this->command = (new SingleCommandApplication())
->setName('calibre_metadata_parser_ff')
->setDescription(<<<'EOF'
Calibre metadata parser for FanFiction files
Each input file, if valid (EPUB file), is transformed to a Calibre input directory, containing the EPUB file and the metadata file (metadata.opf).
The metadata are extracted from the "title" page of the EPUB, based on FicHub.net or FF2EBOOK.com.
EOF
)
->addArgument(
Main::ARGUMENT_FILES,
InputArgument::REQUIRED | InputArgument::IS_ARRAY,
'The files or directories to process'
)
->setCode($this->execute(...));
$this->twig = new Environment(
new ArrayLoader(self::TWIG_TEMPLATES),
);
}
/**
* Run the command
*
* @return void
*
* @throws Throwable If an error occurs
*/
public function run (): void {
$this->command->run();
}
/**
* Execute the command
*
* @param InputInterface $input The command line input
* @param OutputInterface $output The command line output
*
* @return int The command exit status code
*
* @throws Throwable If an error occurs
*/
private function execute (InputInterface $input, OutputInterface $output): int {
foreach ($input->getArgument(self::ARGUMENT_FILES) as $file) {
//region Check if file or directory exists
$fileInfo = new SplFileInfo($file);
if (!file_exists($file)) {
$output->writeln('<error>Unable to find ' . ($fileInfo->isDir() ? 'directory' : 'file') . ': ' . $fileInfo->getPathname() . '</error>');
}
//endregion
if ($fileInfo->isDir()) {
$this->processDirectory($fileInfo, $output);
}
else {
$this->processFile($fileInfo, $output);
}
}
return Command::SUCCESS;
}
/**
* Process a directory
*
* @param SplFileInfo $directory The directory
* @param OutputInterface $output The command line output
*
* @return void
*
* @throws Throwable If an error occurs
*/
private function processDirectory (SplFileInfo $directory, OutputInterface $output): void {
//region Check directory is readable
$output->writeln('<info>Processing directory: ' . $directory->getPathname() . '</info>');
if (!$directory->isReadable()) {
$output->writeln('<error>The directory is not readable</error>');
return;
}
//endregion
//region Treat each EPUB files (check recursively)
$directoryIterator = new RecursiveDirectoryIterator($directory->getPathname());
$directoryIterator = new RecursiveIteratorIterator($directoryIterator);
$directoryIterator = new RegexIterator($directoryIterator, /** @lang PhpRegExp */ '/\.epub$/i', RegexIterator::MATCH);
/** @var SplFileInfo $file */
foreach ($directoryIterator as $file) {
$this->processFile($file, $output);
}
//endregion
}
/**
* Process a file
*
* @param SplFileInfo $file The file
* @param OutputInterface $output The command line output
*
* @return void
*
* @throws Throwable If an error occurs
*/
private function processFile (SplFileInfo $file, OutputInterface $output): void {
//region EPUB check and preparation
//region Check file is OK for processing
$output->writeln('<info>Processing file: ' . $file->getPathname() . '</info>');
if (mb_strtolower($file->getExtension()) !== 'epub') {
$output->writeln('<error>Invalid file extension, "epub" expected</error>');
return;
}
if (!$file->isReadable()) {
$output->writeln('<error>The file is not readable</error>');
return;
}
$fileParentDirectory = new SplFileInfo(dirname($file->getRealPath()));
if (!$fileParentDirectory->isReadable()) {
$output->writeln('<error>The file parent directory is not readable</error>');
return;
}
if (!$fileParentDirectory->isWritable()) {
$output->writeln('<error>The file parent directory is not writable</error>');
return;
}
//endregion
//region Create subdirectory for the file (if necessary)
$parentDirectoryFiles = scandir($fileParentDirectory->getPathname());
$nbFiles = 0;
foreach ($parentDirectoryFiles as $parentDirectoryFile) {
if (in_array($parentDirectoryFile, ['.', '..', 'metadata.opf'])) {
continue;
}
$nbFiles++;
}
if ($nbFiles > 1) {
$fileParentDirectory = new SplFileInfo($fileParentDirectory->getPathname() . DIRECTORY_SEPARATOR . $file->getBasename('.' . $file->getExtension()));
if (!mkdir($fileParentDirectory->getPathname())) {
$output->writeln('<error>Failed to create subdirectory "' . $fileParentDirectory->getPathname() . '": ' . (new LastErrorException())->getMessage() . '</error>');
return;
}
$oldFile = $file;
$file = new SplFileInfo($fileParentDirectory . DIRECTORY_SEPARATOR . $file->getFilename());
if (!rename($oldFile->getPathname(), $file->getPathname())) {
$output->writeln('<error>Failed to move file to subdirectory "' . $oldFile->getPathname() . '" => "' . $file->getPathname() . '": ' . (new LastErrorException())->getMessage()
. '</error>'
);
return;
}
}
//endregion
//endregion
//region Read EPUB first page
//region Open the EPUB (ZIP archive)
$fileArchive = new ZipArchive();
if (($rep = $fileArchive->open($file->getPathname())) !== true) {
$output->writeln('<error>Failed to open file as ZIP archive (error #' . $rep . '): ' . $fileArchive->getStatusString() . '</error>');
return;
}
//endregion
//region Meta "container" file: root file path
$metaContainerPath = 'META-INF/container.xml';
$output->writeln('Processing meta "container": ' . $metaContainerPath, OutputInterface::VERBOSITY_VERBOSE);
//region Read and parse
$metaContainerXPath = $this->readAndParseXmlDocument(
$fileArchive,
$metaContainerPath,
'meta "container" file',
'urn:oasis:names:tc:opendocument:xmlns:container',
$output
);
if ($metaContainerXPath === null) {
$fileArchive->close();
return;
}
//endregion
//region Extract root file path
if (($metaContainerRootFileNodeList = $metaContainerXPath->query('/r:container/r:rootfiles/r:rootfile[@media-type="application/oebps-package+xml"]')) === false) {
$output->writeln('<error>Unable to query root file path</error>');
$fileArchive->close();
return;
}
if ($metaContainerRootFileNodeList->count() === 0) {
$output->writeln('<error>Unable to find root file path</error>');
$fileArchive->close();
return;
}
$rootFilePath = null;
/** @var DOMNode $metaContainerRootFileNode */
foreach ($metaContainerRootFileNodeList as $metaContainerRootFileNode) {
if ($metaContainerRootFileNode instanceof DOMElement && $metaContainerRootFileNode->hasAttribute('full-path')) {
$rootFilePath = $metaContainerRootFileNode->getAttribute('full-path');
break;
}
}
if ($rootFilePath === null) {
$output->writeln('<error>Unable to find root file path</error>');
$fileArchive->close();
return;
}
//endregion
unset($metaContainerXPath);
//endregion
//region Root file: TOC file path (ncx)
$output->writeln('Processing root file: ' . $rootFilePath, OutputInterface::VERBOSITY_VERBOSE);
//region Read and parse
/** @noinspection HttpUrlsUsage */
$rootFileXPath = $this->readAndParseXmlDocument(
$fileArchive,
$rootFilePath,
'root file',
'http://www.idpf.org/2007/opf',
$output
);
if ($rootFileXPath === null) {
$fileArchive->close();
return;
}
//endregion
//region Extract TOC file path (ncx)
if (($tocPagesNodeList = $rootFileXPath->query('/r:package/r:manifest/r:item[@id="ncx"][@media-type="application/x-dtbncx+xml"]')) === false) {
$output->writeln('<error>Unable to query TOC file path</error>');
$fileArchive->close();
return;
}
if ($tocPagesNodeList->count() === 0) {
$output->writeln('<error>Unable to find TOC file path</error>');
$fileArchive->close();
return;
}
$tocFilePath = null;
/** @var DOMNode $rootFileRootFileNode */
foreach ($tocPagesNodeList as $rootFileRootFileNode) {
if ($rootFileRootFileNode instanceof DOMElement && $rootFileRootFileNode->hasAttribute('href')) {
$tocFilePath = $rootFileRootFileNode->getAttribute('href');
break;
}
}
if ($tocFilePath === null) {
$output->writeln('<error>Unable to find TOC file path</error>');
$fileArchive->close();
return;
}
//endregion
//region Delete description if present
//region Deletion form DOM
$rootFileDirty = false;
/** @noinspection HttpUrlsUsage */
$rootFileXPath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/');
if (($descriptionNodeList = $rootFileXPath->query('/r:package/r:metadata/dc:description')) !== false && $descriptionNodeList->count() > 0) {
$output->writeln($descriptionNodeList->count() . ' descriptions found → removing', OutputInterface::VERBOSITY_VERBOSE);
/** @var DOMNode $descriptionNode */
foreach ($descriptionNodeList as $descriptionNode) {
$descriptionNode->parentNode->removeChild($descriptionNode);
$rootFileDirty = true;
}
}
else {
$output->writeln('no descriptions found', OutputInterface::VERBOSITY_VERBOSE);
}
//endregion
//region Overwrite root file in ZIP
if ($rootFileDirty) {
//region Get temporary TOC file
$rootFilePathTemp = sys_get_temp_dir() . DIRECTORY_SEPARATOR . basename($rootFilePath);
$output->writeln('Temporary TOC file path: ' . $rootFilePathTemp, OutputInterface::VERBOSITY_VERBOSE);
if (file_exists($rootFilePathTemp)) {
if (!unlink($rootFilePathTemp)) {
$output->writeln('<error>Unable to delete existing temporary TOC file (' . $rootFilePathTemp . '): ' . (new LastErrorException())->getMessage() . '</error>');
$fileArchive->close();
return;
}
}
//endregion
//region Write temporary TOC file
$rootFileDocument = $rootFileXPath->document;
$rootFileDocument->formatOutput = true;
if (file_put_contents($rootFilePathTemp, $rootFileDocument->saveXML()) === false) {
$output->writeln('<error>Failed to write temporary TOC file (' . $rootFilePathTemp . '): ' . (new LastErrorException())->getMessage() . '</error>');
$fileArchive->close();
return;
}
//endregion
//region Replace TOC file in ZIP
if (($rootFileIndex = $fileArchive->locateName($rootFilePath)) === false) {
$output->writeln('<error>Unable to locate TOC file index: ' . $rootFilePath . '</error>');
$fileArchive->close();
return;
}
/** @noinspection PhpVoidFunctionResultUsedInspection */
if (!$fileArchive->replaceFile(
$rootFilePathTemp,
$rootFileIndex,
flags: ZipArchive::FL_ENC_UTF_8
)) {
$output->writeln('<error>Unable to replace TOC file: ' . $fileArchive->getStatusString() . '</error>');
$fileArchive->close();
return;
}
//endregion
}
//endregion
//endregion
unset($rootFileXPath);
//endregion
//region TOC file: first page content
$tocFilePath = dirname($rootFilePath) . '/' . $tocFilePath;
$output->writeln('Processing TOC file: ' . $tocFilePath, OutputInterface::VERBOSITY_VERBOSE);
//region Read and parse
/** @noinspection HttpUrlsUsage */
$tocFileXPath = $this->readAndParseXmlDocument(
$fileArchive,
$tocFilePath,
'TOC file',
'http://www.daisy.org/z3986/2005/ncx/',
$output
);
if ($tocFileXPath === null) {
$fileArchive->close();
return;
}
//endregion
//region Extract page list with play order
if (($tocPagesNodeList = $tocFileXPath->query('/r:ncx/r:navMap/r:navPoint')) === false) {
$output->writeln('<error>Unable to query page list</error>');
$fileArchive->close();
return;
}
if ($tocPagesNodeList->count() === 0) {
$output->writeln('<error>Unable to find page list</error>');
$fileArchive->close();
return;
}
$pages = [];
/** @var DOMNode $rootFileRootFileNode */
foreach ($tocPagesNodeList as $tocPageNode) {
if (!$tocPageNode instanceof DOMElement) {
continue;
}
$tocPagePlayOrder = (int)$tocPageNode->getAttribute('playOrder');
if (($tocPageContentNodeList = $tocFileXPath->query('./r:content', $tocPageNode)) === false) {
$output->writeln('<error>Unable to query page (play order: ' . $tocPagePlayOrder . ') content</error>');
$fileArchive->close();
return;
}
if ($tocPageContentNodeList->count() === 0) {
$output->writeln('<error>Unable to find page (play order: ' . $tocPagePlayOrder . ') content</error>');
$fileArchive->close();
return;
}
$pageContentPath = null;
/** @var DOMNode $tocPageContentNode */
foreach ($tocPageContentNodeList as $tocPageContentNode) {
if ($tocPageContentNode instanceof DOMElement && $tocPageContentNode->hasAttribute('src')) {
$pageContentPath = $tocPageContentNode->getAttribute('src');
break;
}
}
if ($pageContentPath === null) {
$output->writeln('<error>Unable to find TOC file path</error>');
continue;
}
$pages[$tocPagePlayOrder] = $pageContentPath;
}
//endregion
//region Extract first page
ksort($pages, SORT_NUMERIC);
$firstPagePath = reset($pages);
//endregion
unset($tocFileXPath);
//endregion
//region Read the first page
$firstPagePath = dirname($tocFilePath) . '/' . $firstPagePath;
$output->writeln('Read first page: ' . $firstPagePath, OutputInterface::VERBOSITY_VERBOSE);
if (($firstPageStream = $fileArchive->getStream($firstPagePath)) === false) {
$output->writeln('<error>Failed to open first page (' . $firstPagePath . '): ' . $fileArchive->getStatusString() . '</error>');
$fileArchive->close();
return;
}
if (($firstPageContent = stream_get_contents($firstPageStream)) === false) {
$output->writeln('<error>Failed to read first page: ' . (new LastErrorException())->getMessage() . '</error>');
$fileArchive->close();
return;
}
fclose($firstPageStream);
unset($stream);
//endregion
//region Close the EPUB
$fileArchive->close();
//endregion
//endregion
//region Extract metadata information from first page
$output->writeln('Parsing metadata', OutputInterface::VERBOSITY_VERBOSE);
$metadata = new stdClass();
//region FicHub
if (preg_match('`<p>\s*Exported\s+with\s+the\s+assistance\s+of\s+<a\s+href="https://fichub\.net"\s*>\s*FicHub\.net\s*</a>\s*</p>`i', $firstPageContent) === 1) {
$metadata->publisher = 'FanFiction.net (FicHub.net)';
$metadata->exportedBy = 'FicHub.net';
if (preg_match('`<h1>(?<title>.+?)\s*</h1>`i', $firstPageContent, $match) === 1) {
$metadata->title = $match['title'];
}
if (preg_match('`<p>\s*(?:<b>)?\s*By\s*:\s*(?<author>.+?)\s*(?:</b>)?</p>`i', $firstPageContent, $match) === 1) {
$metadata->author = $match['author'];
}
preg_match_all('`<p>(?:(?<type>\w+):\s*)?(?<value>.+?)</p>`is', $firstPageContent, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
switch (mb_strtolower($match['type'] ?? '')) {
case 'status':
$metadata->status = $match['value'];
break;
case 'published':
$metadata->publishDate = $match['value'];
break;
case 'updated':
$metadata->lastUpdateDate = $match['value'];
break;
case 'words':
$metadata->words = $this->getMetadataNumber($match['value']);
break;
case 'chapters':
$metadata->chapters = $this->getMetadataNumber($match['value']);
break;
case 'rated':
$parts = explode('-', $match['value']);
if (preg_match('`^\s*Fiction\s+(?<rated>.+?)\s*$`i', trim(array_shift($parts)), $ratedMatch) === 1) {
$metadata->rated = $ratedMatch['rated'];
}
foreach ($parts as $part) {
if (preg_match('`^\s*(?<type>\w+)\s*:\s*(?<value>.+)\s*$`i', $part, $partMatch) !== 1) {
continue;
}
switch (mb_strtolower($partMatch['type'] ?? '')) {
case 'language':
$metadata->language = $partMatch['value'];
break;
case 'genre':
$metadata->genre = $partMatch['value'];
break;
case 'characters':
$metadata->characters = $partMatch['value'];
break;
}
}
break;
case '':
if (preg_match('`^Original\s+source\s*:\s*.+href\s*=\s*"(?<url>[^"]+)"`i', $match['value'], $matchUrl) === 1) {
$metadata->url = $matchUrl['url'];
}
elseif (preg_match('`^Exported\s+with`i', $match['value']) !== 1) {
$metadata->summary = $match['value'];
}
break;
}
}
}
//endregion
//region FF2EBOOK
elseif (
preg_match(
'`<div\s+class="footer"\s*>\s*Converted\s+using\s+<a\s+href="http://www\.ff2ebook\.com"\s*>\s*www\.FF2EBOOK\.com\s*</a>\s*<br ?/?>\s*Date:\s*(?<date>\d{4}-\d{2}-\d{2})\s*</div>`i',
$firstPageContent,
$match
) === 1
) {
$metadata->publisher = 'FanFiction.net (www.FF2EBOOK.com)';
$metadata->exportedBy = 'www.FF2EBOOK.com (' . $match['date'] . ')';
if (
preg_match(
'`<div\s+class="fic-title">\s*(?:<a\s+href\s*=\s*"(?<url>[^"]+)"\s*>)?\s*(?<title>.+?)\s*(?:</a>)?\s*</div>`i', $firstPageContent, $match
) === 1
) {
$metadata->title = $match['title'];
if (($match['url'] ?? '') !== '') {
$metadata->url = $match['url'];
}
}
if (
preg_match(
'`<div\s+class="fic-author">\s*By:\s*(?:<a\s+href\s*=\s*"[^"]+"\s*>)?\s*(?<author>.+?)\s*(?:</a>)?\s*</div>`i', $firstPageContent, $match
) === 1
) {
$metadata->author = $match['author'];
}
preg_match_all('`<span\s+class="bold">(?<type>.+?)\s*:\s*</span>\s*(?<value>.+?)\s*<br ?/?>`is', $firstPageContent, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
switch (mb_strtolower($match['type'] ?? '')) {
case 'status':
$metadata->status = match (mb_strtolower($match['value'])) {
'completed' => 'complete',
default => $match['value'],
};
break;
case 'published':
$metadata->publishDate = $match['value'];
break;
case 'last updated':
$metadata->lastUpdateDate = $match['value'];
break;
case 'words count':
$metadata->words = $match['value'];
break;
case 'chapters count':
$metadata->chapters = $match['value'];
break;
case 'pairings/main char.':
$metadata->characters = (isset($metadata->characters) ? $metadata->characters . ' ' : '') . $match['value'];
break;
case 'additional infos':
if (preg_match('`^\s*(?:(?<genre>[^>]+)\s*-\s*)?(?<pairings>[^>]+)\s*`i', $match['value'], $infoMatch) === 1) {
if (($infoMatch['genre'] ?? '') !== '') {
$metadata->genre = $infoMatch['genre'];
}
if (($infoMatch['pairings'] ?? '') !== '') {
$metadata->characters = $infoMatch['pairings'] . (isset($metadata->characters) ? ' ' . $metadata->characters : '');
}
}
break;
case 'summary':
$metadata->summary = $match['value'];
break;
}
}
}
//endregion
//region Unknown (error)
else {
$output->writeln('<error>Unrecognized format</error>');
$output->writeln($firstPageContent, OutputInterface::VERBOSITY_VERY_VERBOSE);
return;
}
//endregion
//endregion
//region Generate Calibre metadata (OPF file)
$output->writeln('Generate Calibre metadata (OPF file)', OutputInterface::VERBOSITY_VERBOSE);
$opf = new DOMDocument('1.0', 'utf-8');
$opf->formatOutput = true;
/** @noinspection HttpUrlsUsage */
$root = $opf->createElementNS('http://www.idpf.org/2007/opf', 'package');
$opf->appendChild($root);
$root->setAttribute('unique-identifier', 'uuid_id');
$root->setAttribute('version', '2.0');
$opfMetadata = $opf->createElement('metadata');
$root->appendChild($opfMetadata);
/** @noinspection HttpUrlsUsage */
$opfMetadata->setAttributeNS(self::DOM_NAMESPACE_ATTRIBUTE, 'xmlns:dc', self::OPF_NAMESPACE_DC);
/** @noinspection HttpUrlsUsage */
$opfMetadata->setAttributeNS(self::DOM_NAMESPACE_ATTRIBUTE, 'xmlns:opf', self::OPF_NAMESPACE_OPF);
$publisher = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:publisher', $metadata->publisher);
$opfMetadata->appendChild($publisher);
if (isset($metadata->title)) {
$title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:title', $metadata->title);
$opfMetadata->appendChild($title);
}
if (isset($metadata->author)) {
$author = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:creator', $metadata->author);
$opfMetadata->appendChild($author);
$author->setAttribute('opf:role', 'aut');
}
if (isset($metadata->publishDate)) {
$title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:date', $metadata->publishDate . 'T00:00:00+00:00');
$opfMetadata->appendChild($title);
}
if (isset($metadata->language)) {
$title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:language', mb_substr(mb_strtolower($metadata->language), 0, 3));
$opfMetadata->appendChild($title);
}
$description = $opf->createElementNS(
self::OPF_NAMESPACE_DC, 'dc:description',
$this->twig->render(
self::TWIG_TEMPLATE_METADATA_DESCRIPTION,
(array)$metadata
)
);
$opfMetadata->appendChild($description);
$this->calibreAddMetaColumn($opfMetadata, 'buy', self::CALIBRE_COLUMN_BUY);
$this->calibreAddMetaColumn($opfMetadata, 'collection', self::CALIBRE_COLUMN_COLLECTION);
$this->calibreAddMetaColumn($opfMetadata, 'collection_manual', self::CALIBRE_COLUMN_COLLECTION_MANUAL);
$this->calibreAddMetaColumn($opfMetadata, 'genre', self::CALIBRE_COLUMN_GENRE);
$this->calibreAddMetaColumn($opfMetadata, 'groups_series', self::CALIBRE_COLUMN_GROUPS_SERIES);
if (file_put_contents($fileParentDirectory->getPathname() . DIRECTORY_SEPARATOR . 'metadata.opf', $opf->saveXML()) === false) {
$output->writeln('<error>Failed to write the Calibre metadata: ' . (new LastErrorException())->getMessage() . '</error>');
return;
}
$output->writeln('Calibre metadata generated !', OutputInterface::VERBOSITY_VERBOSE);
//endregion
}
/**
* Read and parse and XML document of a ZIP archive
*
* @param ZipArchive $fileArchive The ZIP archive
* @param string $documentPath The XML document path in ZIP archive
* @param string $label The label for the command line output
* @param string $namespace The XPath namespace to register
* @param OutputInterface $output The command line output
*
* @return DOMXPath|null The XPath or Null if failed
*/
private function readAndParseXmlDocument (ZipArchive $fileArchive, string $documentPath, string $label, string $namespace, OutputInterface $output): ?DOMXPath {
//region Read the file
if (($stream = $fileArchive->getStream($documentPath)) === false) {
$output->writeln('<error>Failed to open ' . $label . ': ' . $fileArchive->getStatusString() . '</error>');
return null;
}
if (($content = stream_get_contents($stream)) === false) {
$output->writeln('<error>Failed to read ' . $label . ': ' . (new LastErrorException())->getMessage() . '</error>');
return null;
}
fclose($stream);
unset($stream);
//endregion
//region Parse it as XML
libxml_clear_errors();
$dom = new DOMDocument();
if (!$dom->loadXML($content, LIBXML_COMPACT)) {
$errors = libxml_get_errors();
$output->writeln('<error>Failed to parse ' . $label . ': ' . count($errors) . ' errors</error>');
foreach ($errors as $error) {
$levelName = match ($error->level) {
LIBXML_ERR_WARNING => 'Warning',
LIBXML_ERR_ERROR => 'Error',
LIBXML_ERR_FATAL => 'Fatal',
LIBXML_ERR_NONE => 'None',
default => 'Unknown (' . $error->level . ')'
};
$output->writeln(
'<error>' . $levelName . ' #' . $error->code . ': ' . $error->message . ' (line: ' . $error->line . ', column: ' . $error->column . ')</error>',
OutputInterface::VERBOSITY_VERBOSE
);
}
return null;
}
//endregion
//region Register XPath namespaces
$xpath = new DOMXPath($dom);
/** @noinspection HttpUrlsUsage */
if (!$xpath->registerNamespace('r', $namespace)) {
$output->writeln('<error>Failed to register ' . $label . ' XPath namespace</error>');
$fileArchive->close();
return null;
}
//endregion
return $xpath;
}
/**
* Convert a number to the metadata form
*
* @param string $number The number to convert
*
* @return string The number in metadata form
*/
private function getMetadataNumber (string $number): string {
$numberLength = mb_strlen($number);
$number = MbstringExtended::str_pad($number, $numberLength + 3 - ($numberLength % 3), '0', STR_PAD_LEFT);
$numberParts = mb_str_split($number, 3);
$numberParts[0] = ltrim($numberParts[0], '0');
return implode(',', $numberParts);
}
/**
* Add a Calibre meta column
*
* @param DOMElement $metadata The "metadata" node
* @param string $name The column name
* @param array $content The column content and definition
*
* @return void
*
* @throws DOMException If an error occurs
*/
private function calibreAddMetaColumn (DOMElement $metadata, string $name, array $content): void {
$metaColumn = $metadata->ownerDocument->createElement('meta');
$metadata->appendChild($metaColumn);
$metaColumn->setAttribute('name', 'calibre:user_metadata:#' . $name);
$metaColumn->setAttribute('content', json_encode($content));
}
}