diff --git a/.run/Run.run.xml b/.run/Run.run.xml
new file mode 100644
index 0000000..ffc2ac2
--- /dev/null
+++ b/.run/Run.run.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/composer.json b/composer.json
index 507bbc3..15d2a13 100644
--- a/composer.json
+++ b/composer.json
@@ -1,36 +1,41 @@
{
- "name": "jrosset/calibre_metadata_parser_ff",
- "description": "Calibre metadata parser for FanFiction files",
- "keywords": [],
- "type": "library",
- "config": {
- "sort-packages": true
- },
- "minimum-stability": "stable",
- "require": {
- "php": "^8.3",
- "jrosset/lasterrorexception": "^1.1",
- "symfony/console": "^7.0"
- },
- "autoload": {
- "psr-4": {
- "jrosset\\": "src/"
+ "name": "jrosset/calibre_metadata_parser_ff",
+ "description": "Calibre metadata parser for FanFiction files",
+ "keywords": [ ],
+ "type": "library",
+ "config": {
+ "sort-packages": true
+ },
+ "minimum-stability": "stable",
+ "require": {
+ "php": "^8.3",
+ "ext-dom": "*",
+ "ext-libxml": "*",
+ "ext-zip": "*",
+ "jrosset/lasterrorexception": "^1.1",
+ "jrosset/mbstring-extended": "^1.3",
+ "symfony/console": "^7.0",
+ "twig/twig": "^3.10"
+ },
+ "autoload": {
+ "psr-4": {
+ "jrosset\\": "src/"
+ }
+ },
+ "readme": "README.md",
+ "homepage": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff",
+ "license": "CC-BY-4.0",
+ "authors": [
+ {
+ "name": "Julien Rosset",
+ "email": "jul.rosset@gmail.com"
+ }
+ ],
+ "support": {
+ "email": "jul.rosset@gmail.com",
+ "issues": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/issues",
+ "wiki": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/wiki",
+ "docs": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/wiki",
+ "source": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff"
}
- },
- "readme": "README.md",
- "homepage": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff",
- "license": "CC-BY-4.0",
- "authors": [
- {
- "name": "Julien Rosset",
- "email": "jul.rosset@gmail.com"
- }
- ],
- "support": {
- "email": "jul.rosset@gmail.com",
- "issues": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/issues",
- "wiki": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/wiki",
- "docs": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff/wiki",
- "source": "https://git.jrosset.ovh/jrosset/calibre_metadata_parser_ff"
- }
}
diff --git a/run.php b/run.php
index bc81103..6e0d046 100644
--- a/run.php
+++ b/run.php
@@ -5,5 +5,7 @@ require_once __DIR__ . '/vendor/autoload.php';
use jrosset\Main;
+mb_internal_encoding('UTF-8');
+
(new Main())
->run();
diff --git a/src/Main.php b/src/Main.php
index 20f9da6..216fbb2 100644
--- a/src/Main.php
+++ b/src/Main.php
@@ -1,39 +1,231 @@
- <<<'TWIG'
+
+
{{ description }}
+
Chapters: {{ chapters ?? 1 }}
+
Word count: {{ words }}
+
Published: {{ publishDate }}
+
Last update: {{ lastUpdateDate ?? publishDate }}
+
Status: {{ status }}
+
Rated: {{ rated }}
+
Genre: {{ genre }}
+
Pairings: {{ characters }}
+
Source link:{{ url }}
+
Exported by: {{ exportedBy }})
+
+TWIG
+ ,
+ ];
+
+ private const array CALIBRE_COLUMN_BUY = [
+ 'table' => 'custom_column_9',
+ 'column' => 'value',
+ 'datatype' => 'enumeration',
+ 'is_multiple' => null,
+ 'kind' => 'field',
+ 'name' => 'Acheté',
+ 'search_terms' => [
+ '#buy',
+ ],
+ 'label' => 'buy',
+ 'colnum' => 9,
+ 'display' => [
+ 'description' => '',
+ 'enum_values' => [
+ 'Non',
+ 'En ligne',
+ 'Papier',
+ 'Libre',
+ ],
+ 'use_decorations' => 0,
+ 'enum_colors' => [
+ ],
+ ],
+ 'is_custom' => true,
+ 'is_category' => true,
+ 'link_column' => 'value',
+ 'category_sort' => 'value',
+ 'is_csp' => false,
+ 'is_editable' => true,
+ 'rec_index' => 22,
+ '#value#' => 'Libre',
+ '#extra#' => null,
+ 'is_multiple2' => [
+ ],
+ ];
+ private const array CALIBRE_COLUMN_COLLECTION = [
+ 'table' => 'custom_column_5',
+ 'column' => 'value',
+ 'datatype' => 'composite',
+ 'is_multiple' => null,
+ 'kind' => 'field',
+ 'name' => 'Collection',
+ 'search_terms' => [
+ '#collection',
+ ],
+ 'label' => 'collection',
+ 'colnum' => 5,
+ 'display' => [
+ 'make_category' => true,
+ 'composite_sort' => 'text',
+ 'contains_html' => false,
+ 'use_decorations' => 0,
+ 'composite_template' => '{#collection_manual:ifempty({#groups_series}{#groups_series_index:0>2s| [|]}{series:\'contains($,\'<ordre>\',\'\',strcat(test(field(\'#groups_series\'),\' - \', \'\'),$))\'})}',
+ 'description' => '',
+ ],
+ 'is_custom' => true,
+ 'is_category' => false,
+ 'link_column' => 'value',
+ 'category_sort' => 'value',
+ 'is_csp' => false,
+ 'is_editable' => true,
+ 'rec_index' => 23,
+ '#value#' => 'Fan-Fiction - Harry Potter',
+ 'is_multiple2' => [
+ ],
+ ];
+ private const array CALIBRE_COLUMN_COLLECTION_MANUAL = [
+ 'table' => 'custom_column_7',
+ 'column' => 'value',
+ 'datatype' => 'text',
+ 'is_multiple' => null,
+ 'kind' => 'field',
+ 'name' => 'Nom manuel de collection',
+ 'search_terms' => [
+ '#collection_manual',
+ ],
+ 'label' => 'collection_manual',
+ 'colnum' => 7,
+ 'display' => [
+ 'use_decorations' => 0,
+ 'description' => '',
+ ],
+ 'is_custom' => true,
+ 'is_category' => true,
+ 'link_column' => 'value',
+ 'category_sort' => 'value',
+ 'is_csp' => false,
+ 'is_editable' => true,
+ 'rec_index' => 24,
+ '#value#' => 'Fan-Fiction - Harry Potter',
+ '#extra#' => null,
+ 'is_multiple2' => [
+ ],
+ ];
+ private const array CALIBRE_COLUMN_GENRE = [
+ 'table' => 'custom_column_2',
+ 'column' => 'value',
+ 'datatype' => 'text',
+ 'is_multiple' => '|',
+ 'kind' => 'field',
+ 'name' => 'Genre',
+ 'search_terms' => [
+ '#genre',
+ ],
+ 'label' => 'genre',
+ 'colnum' => 2,
+ 'display' => [
+ 'is_names' => false,
+ 'description' => '',
+ ],
+ 'is_custom' => true,
+ 'is_category' => true,
+ 'link_column' => 'value',
+ 'category_sort' => 'value',
+ 'is_csp' => false,
+ 'is_editable' => true,
+ 'rec_index' => 25,
+ '#value#' => [
+ 'Fan-Fiction',
+ 'Fantastique',
+ ],
+ '#extra#' => null,
+ 'is_multiple2' => [
+ 'cache_to_list' => '|',
+ 'ui_to_list' => ',',
+ 'list_to_ui' => ', ',
+ ],
+ ];
+ private const array CALIBRE_COLUMN_GROUPS_SERIES = [
+ 'table' => 'custom_column_6',
+ 'column' => 'value',
+ 'datatype' => 'series',
+ 'is_multiple' => null,
+ 'kind' => 'field',
+ 'name' => 'Groupe de séries',
+ 'search_terms' => [
+ '#groups_series',
+ ],
+ 'label' => 'groups_series',
+ 'colnum' => 6,
+ 'display' => [
+ 'description' => '',
+ ],
+ 'is_custom' => true,
+ 'is_category' => true,
+ 'link_column' => 'value',
+ 'category_sort' => 'value',
+ 'is_csp' => false,
+ 'is_editable' => true,
+ 'rec_index' => 26,
+ '#value#' => null,
+ '#extra#' => null,
+ 'is_multiple2' => [
+ ],
+ ];
+
/**
* @var SingleCommandApplication The command
*/
private readonly SingleCommandApplication $command;
+ /**
+ * @var Environment The twig environment
+ */
+ private Environment $twig;
/**
* Initialization
*/
- public function __construct()
- {
+ public function __construct () {
$this->command = (new SingleCommandApplication())
->setName('calibre_metadata_parser_ff')
->setDescription(<<<'EOF'
@@ -49,6 +241,10 @@ EOF
'The files or directories to process'
)
->setCode($this->execute(...));
+
+ $this->twig = new Environment(
+ new ArrayLoader(self::TWIG_TEMPLATES),
+ );
}
/**
@@ -58,11 +254,9 @@ EOF
*
* @throws Throwable If an error occurs
*/
- public function run(): void
- {
+ public function run (): void {
$this->command->run();
}
-
/**
* Execute the command
*
@@ -73,8 +267,7 @@ EOF
*
* @throws Throwable If an error occurs
*/
- private function execute(InputInterface $input, OutputInterface $output): int
- {
+ private function execute (InputInterface $input, OutputInterface $output): int {
foreach ($input->getArgument(self::ARGUMENT_FILES) as $file) {
//region Check if file or directory exists
$fileInfo = new SplFileInfo($file);
@@ -84,9 +277,10 @@ EOF
//endregion
if ($fileInfo->isDir()) {
- $this->processDirectory($file, $output);
- } else {
- $this->processFile($file, $output);
+ $this->processDirectory($fileInfo, $output);
+ }
+ else {
+ $this->processFile($fileInfo, $output);
}
}
return Command::SUCCESS;
@@ -95,15 +289,14 @@ EOF
/**
* Process a directory
*
- * @param SplFileInfo $directory The directory
- * @param OutputInterface $output The command line output
+ * @param SplFileInfo $directory The directory
+ * @param OutputInterface $output The command line output
*
* @return void
*
* @throws Throwable If an error occurs
*/
- private function processDirectory(SplFileInfo $directory, OutputInterface $output): void
- {
+ private function processDirectory (SplFileInfo $directory, OutputInterface $output): void {
//region Check directory is readable
$output->writeln('Processing directory: ' . $directory->getPathname() . '');
if (!$directory->isReadable()) {
@@ -122,7 +315,6 @@ EOF
}
//endregion
}
-
/**
* Process a file
*
@@ -133,8 +325,8 @@ EOF
*
* @throws Throwable If an error occurs
*/
- private function processFile(SplFileInfo $file, OutputInterface $output): void
- {
+ private function processFile (SplFileInfo $file, OutputInterface $output): void {
+ //region EPUB check and preparation
//region Check file is OK for processing
$output->writeln('Processing file: ' . $file->getPathname() . '');
@@ -158,7 +350,15 @@ EOF
}
//endregion
//region Create subdirectory for the file (if necessary)
- if (count(scandir($fileParentDirectory->getPathname())) > 1) {
+ $parentDirectoryFiles = scandir($fileParentDirectory->getPathname());
+ $nbFiles = 0;
+ foreach ($parentDirectoryFiles as $parentDirectoryFile) {
+ if (in_array($parentDirectoryFile, ['.', '..', 'metadata.opf'])) {
+ continue;
+ }
+ $nbFiles++;
+ }
+ if ($nbFiles > 1) {
$fileParentDirectory = new SplFileInfo($fileParentDirectory->getPathname() . DIRECTORY_SEPARATOR . $file->getBasename('.' . $file->getExtension()));
if (!mkdir($fileParentDirectory->getPathname())) {
$output->writeln('Failed to create subdirectory "' . $fileParentDirectory->getPathname() . '": ' . (new LastErrorException())->getMessage() . '');
@@ -167,11 +367,529 @@ EOF
$oldFile = $file;
$file = new SplFileInfo($fileParentDirectory . DIRECTORY_SEPARATOR . $file->getFilename());
- if (rename($oldFile->getPathname(), $file->getPathname())) {
- $output->writeln('Failed to move file to subdirectory "' . $oldFile->getPathname() . '" => "' . $file->getPathname() . '": ' . (new LastErrorException())->getMessage() . '');
+ if (!rename($oldFile->getPathname(), $file->getPathname())) {
+ $output->writeln('Failed to move file to subdirectory "' . $oldFile->getPathname() . '" => "' . $file->getPathname() . '": ' . (new LastErrorException())->getMessage()
+ . ''
+ );
+ return;
+ }
+ }
+ //endregion
+ //endregion
+ //region Read EPUB first page
+ //region Open the EPUB (ZIP archive)
+ $fileArchive = new ZipArchive();
+ if (($rep = $fileArchive->open($file->getPathname())) !== true) {
+ $output->writeln('Failed to open file as ZIP archive (error #' . $rep . '): ' . $fileArchive->getStatusString() . '');
+ return;
+ }
+ //endregion
+
+ //region Meta "container" file: root file path
+ $metaContainerPath = 'META-INF' . DIRECTORY_SEPARATOR . 'container.xml';
+ $output->writeln('Processing meta "container": ' . $metaContainerPath, OutputInterface::VERBOSITY_VERBOSE);
+
+ //region Read and parse
+ $metaContainerXPath = $this->readAndParseXmlDocument(
+ $fileArchive,
+ $metaContainerPath,
+ 'meta "container" file',
+ 'urn:oasis:names:tc:opendocument:xmlns:container',
+ $output
+ );
+ if ($metaContainerXPath === null) {
+ $fileArchive->close();
+ return;
+ }
+ //endregion
+ //region Extract root file path
+ if (($metaContainerRootFileNodeList = $metaContainerXPath->query('/r:container/r:rootfiles/r:rootfile[@media-type="application/oebps-package+xml"]')) === false) {
+ $output->writeln('Unable to query root file path');
+ $fileArchive->close();
+ return;
+ }
+ if ($metaContainerRootFileNodeList->count() === 0) {
+ $output->writeln('Unable to find root file path');
+ $fileArchive->close();
+ return;
+ }
+
+ $rootFilePath = null;
+ /** @var DOMNode $metaContainerRootFileNode */
+ foreach ($metaContainerRootFileNodeList as $metaContainerRootFileNode) {
+ if ($metaContainerRootFileNode instanceof DOMElement && $metaContainerRootFileNode->hasAttribute('full-path')) {
+ $rootFilePath = $metaContainerRootFileNode->getAttribute('full-path');
+ break;
+ }
+ }
+
+ if ($rootFilePath === null) {
+ $output->writeln('Unable to find root file path');
+ $fileArchive->close();
+ return;
+ }
+ //endregion
+
+ unset($metaContainerXPath);
+ //endregion
+ //region Root file: TOC file path (ncx)
+ $output->writeln('Processing root file: ' . $rootFilePath, OutputInterface::VERBOSITY_VERBOSE);
+
+ //region Read and parse
+ /** @noinspection HttpUrlsUsage */
+ $rootFileXPath = $this->readAndParseXmlDocument(
+ $fileArchive,
+ $rootFilePath,
+ 'root file',
+ 'http://www.idpf.org/2007/opf',
+ $output
+ );
+ if ($rootFileXPath === null) {
+ $fileArchive->close();
+ return;
+ }
+ //endregion
+ //region Extract TOC file path (ncx)
+ if (($tocPagesNodeList = $rootFileXPath->query('/r:package/r:manifest/r:item[@id="ncx"][@media-type="application/x-dtbncx+xml"]')) === false) {
+ $output->writeln('Unable to query TOC file path');
+ $fileArchive->close();
+ return;
+ }
+ if ($tocPagesNodeList->count() === 0) {
+ $output->writeln('Unable to find TOC file path');
+ $fileArchive->close();
+ return;
+ }
+
+ $tocFilePath = null;
+ /** @var DOMNode $rootFileRootFileNode */
+ foreach ($tocPagesNodeList as $rootFileRootFileNode) {
+ if ($rootFileRootFileNode instanceof DOMElement && $rootFileRootFileNode->hasAttribute('href')) {
+ $tocFilePath = $rootFileRootFileNode->getAttribute('href');
+ break;
+ }
+ }
+
+ if ($tocFilePath === null) {
+ $output->writeln('Unable to find TOC file path');
+ $fileArchive->close();
+ return;
+ }
+ //endregion
+
+ unset($rootFileXPath);
+ //endregion
+ //region TOC file: first page content
+ $tocFilePath = dirname($rootFilePath) . DIRECTORY_SEPARATOR . $tocFilePath;
+ $output->writeln('Processing TOC file: ' . $tocFilePath, OutputInterface::VERBOSITY_VERBOSE);
+
+ //region Read and parse
+ /** @noinspection HttpUrlsUsage */
+ $tocFileXPath = $this->readAndParseXmlDocument(
+ $fileArchive,
+ $tocFilePath,
+ 'TOC file',
+ 'http://www.daisy.org/z3986/2005/ncx/',
+ $output
+ );
+ if ($tocFileXPath === null) {
+ $fileArchive->close();
+ return;
+ }
+ //endregion
+ //region Extract page list with play order
+ if (($tocPagesNodeList = $tocFileXPath->query('/r:ncx/r:navMap/r:navPoint')) === false) {
+ $output->writeln('Unable to query page list');
+ $fileArchive->close();
+ return;
+ }
+ if ($tocPagesNodeList->count() === 0) {
+ $output->writeln('Unable to find page list');
+ $fileArchive->close();
+ return;
+ }
+
+ $pages = [];
+ /** @var DOMNode $rootFileRootFileNode */
+ foreach ($tocPagesNodeList as $tocPageNode) {
+ if (!$tocPageNode instanceof DOMElement) {
+ continue;
+ }
+
+ $tocPagePlayOrder = (int)$tocPageNode->getAttribute('playOrder');
+
+ if (($tocPageContentNodeList = $tocFileXPath->query('./r:content', $tocPageNode)) === false) {
+ $output->writeln('Unable to query page (play order: ' . $tocPagePlayOrder . ') content');
+ $fileArchive->close();
+ return;
+ }
+ if ($tocPageContentNodeList->count() === 0) {
+ $output->writeln('Unable to find page (play order: ' . $tocPagePlayOrder . ') content');
+ $fileArchive->close();
return;
}
+
+ $pageContentPath = null;
+ /** @var DOMNode $tocPageContentNode */
+ foreach ($tocPageContentNodeList as $tocPageContentNode) {
+ if ($tocPageContentNode instanceof DOMElement && $tocPageContentNode->hasAttribute('src')) {
+ $pageContentPath = $tocPageContentNode->getAttribute('src');
+ break;
+ }
+ }
+
+ if ($pageContentPath === null) {
+ $output->writeln('Unable to find TOC file path');
+ continue;
+ }
+
+ $pages[$tocPagePlayOrder] = $pageContentPath;
}
//endregion
+ //region Extract first page
+ ksort($pages, SORT_NUMERIC);
+ $firstPagePath = reset($pages);
+ //endregion
+
+ unset($tocFileXPath);
+ //endregion
+ //region Read the first page
+ $firstPagePath = dirname($tocFilePath) . DIRECTORY_SEPARATOR . $firstPagePath;
+ $output->writeln('Read first page: ' . $firstPagePath, OutputInterface::VERBOSITY_VERBOSE);
+ if (($firstPageStream = $fileArchive->getStream($firstPagePath)) === false) {
+ $output->writeln('Failed to open first page: ' . $fileArchive->getStatusString() . '');
+ $fileArchive->close();
+ return;
+ }
+ if (($firstPageContent = stream_get_contents($firstPageStream)) === false) {
+ $output->writeln('Failed to read first page: ' . (new LastErrorException())->getMessage() . '');
+ $fileArchive->close();
+ return;
+ }
+ fclose($firstPageStream);
+ unset($stream);
+ //endregion
+
+ //region Close the EPUB
+ $fileArchive->close();
+ //endregion
+ //endregion
+ //region Extract metadata information from first page
+ $output->writeln('Parsing metadata', OutputInterface::VERBOSITY_VERBOSE);
+ $metadata = new stdClass();
+
+ //region FicHub
+ if (preg_match('`\s*Exported\s+with\s+the\s+assistance\s+of\s+\s*FicHub\.net\s*\s*
`i', $firstPageContent) === 1) {
+ $metadata->publisher = 'FanFiction.net (FicHub.net)';
+ $metadata->exportedBy = 'FicHub.net';
+
+ if (preg_match('`(?.+?)\s*
`i', $firstPageContent, $match) === 1) {
+ $metadata->title = $match['title'];
+ }
+ if (preg_match('`\s*(?:)?\s*By\s*:\s*(?.+?)\s*(?:)?
`i', $firstPageContent, $match) === 1) {
+ $metadata->author = $match['author'];
+ }
+
+ preg_match_all('`(?:(?\w+):\s*)?(?.+?)
`is', $firstPageContent, $matches, PREG_SET_ORDER);
+ foreach ($matches as $match) {
+ switch (mb_strtolower($match['type'] ?? '')) {
+ case 'status':
+ $metadata->status = $match['value'];
+ break;
+
+ case 'published':
+ $metadata->publishDate = $match['value'];
+ break;
+
+ case 'updated':
+ $metadata->lastUpdateDate = $match['value'];
+ break;
+
+ case 'words':
+ $metadata->words = $this->getMetadataNumber($match['value']);
+ break;
+
+ case 'chapters':
+ $metadata->chapters = $this->getMetadataNumber($match['value']);
+ break;
+
+ case 'rated':
+ $parts = explode('-', $match['value']);
+
+ if (preg_match('`^\s*Fiction\s+(?.+?)\s*$`i', trim(array_shift($parts)), $ratedMatch) === 1) {
+ $metadata->rated = $ratedMatch['rated'];
+ }
+
+ foreach ($parts as $part) {
+ if (preg_match('`^\s*(?\w+)\s*:\s*(?.+)\s*$`i', $part, $partMatch) !== 1) {
+ continue;
+ }
+
+ switch (mb_strtolower($partMatch['type'] ?? '')) {
+ case 'language':
+ $metadata->language = $partMatch['value'];
+ break;
+
+ case 'genre':
+ $metadata->genre = $partMatch['value'];
+ break;
+
+ case 'characters':
+ $metadata->characters = $partMatch['value'];
+ break;
+ }
+ }
+ break;
+
+ case '':
+ if (preg_match('`^Original\s+source\s*:\s*.+href\s*=\s*"(?[^"]+)"`i', $match['value'], $matchUrl) === 1) {
+ $metadata->source = $matchUrl['url'];
+ }
+ elseif (preg_match('`^Exported\s+with`i', $match['value']) !== 1) {
+ $metadata->summary = $match['value'];
+ }
+ break;
+ }
+ }
+ }
+ //endregion
+ //region FF2EBOOK
+ elseif (
+ preg_match(
+ '``i',
+ $firstPageContent,
+ $match
+ ) === 1
+ ) {
+ $metadata->publisher = 'FanFiction.net (www.FF2EBOOK.com)';
+ $metadata->exportedBy = 'www.FF2EBOOK.com (' . $match['date'] . ')';
+
+ if (
+ preg_match(
+ '``i', $firstPageContent, $match
+ ) === 1
+ ) {
+ $metadata->title = $match['title'];
+ if (($match['url'] ?? '') !== '') {
+ $metadata->url = $match['url'];
+ }
+ }
+ if (
+ preg_match(
+ '``i', $firstPageContent, $match
+ ) === 1
+ ) {
+ $metadata->author = $match['author'];
+ }
+
+ preg_match_all('`(?.+?)\s*:\s*\s*(?.+?)\s*
`is', $firstPageContent, $matches, PREG_SET_ORDER);
+ foreach ($matches as $match) {
+ switch (mb_strtolower($match['type'] ?? '')) {
+ case 'status':
+ $metadata->status = match (mb_strtolower($match['value'])) {
+ 'completed' => 'complete',
+ default => $match['value'],
+ };
+ break;
+
+ case 'published':
+ $metadata->publishDate = $match['value'];
+ break;
+
+ case 'last updated':
+ $metadata->lastUpdateDate = $match['value'];
+ break;
+
+ case 'words count':
+ $metadata->words = $match['value'];
+ break;
+
+ case 'chapters count':
+ $metadata->chapters = $match['value'];
+ break;
+
+ case 'pairings/main char.':
+ $metadata->characters = (isset($metadata->characters) ? $metadata->characters . ' ' : '') . $match['value'];
+ break;
+
+ case 'additional infos':
+ if (preg_match('`^\s*(?:(?[^>]+)\s*-\s*)?(?[^>]+)\s*`i', $match['value'], $infoMatch) === 1) {
+ if (($infoMatch['genre'] ?? '') !== '') {
+ $metadata->genre = $infoMatch['genre'];
+ }
+ if (($infoMatch['pairings'] ?? '') !== '') {
+ $metadata->characters = $infoMatch['pairings'] . (isset($metadata->characters) ? ' ' . $metadata->characters : '');
+ }
+ }
+ break;
+
+ case 'summary':
+ $metadata->summary = $match['value'];
+ break;
+ }
+ }
+ }
+ //endregion
+ //region Unknown (error)
+ else {
+ $output->writeln('Unrecognized format');
+ $output->writeln($firstPageContent, OutputInterface::VERBOSITY_VERY_VERBOSE);
+ return;
+ }
+ //endregion
+ //endregion
+ //region Generate Calibre metadata (OPF file)
+ $output->writeln('Generate Calibre metadata (OPF file)', OutputInterface::VERBOSITY_VERBOSE);
+
+ $opf = new DOMDocument('1.0', 'utf-8');
+ $opf->formatOutput = true;
+
+ /** @noinspection HttpUrlsUsage */
+ $root = $opf->createElementNS('http://www.idpf.org/2007/opf', 'package');
+ $opf->appendChild($root);
+ $root->setAttribute('unique-identifier', 'uuid_id');
+ $root->setAttribute('version', '2.0');
+
+ $opfMetadata = $opf->createElement('metadata');
+ $root->appendChild($opfMetadata);
+ /** @noinspection HttpUrlsUsage */
+ $opfMetadata->setAttributeNS(self::DOM_NAMESPACE_ATTRIBUTE, 'xmlns:dc', self::OPF_NAMESPACE_DC);
+ /** @noinspection HttpUrlsUsage */
+ $opfMetadata->setAttributeNS(self::DOM_NAMESPACE_ATTRIBUTE, 'xmlns:opf', self::OPF_NAMESPACE_OPF);
+
+ $title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:publisher', $metadata->publisher);
+ $opfMetadata->appendChild($title);
+
+ if (isset($metadata->title)) {
+ $title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:title', $metadata->title);
+ $opfMetadata->appendChild($title);
+ }
+ if (isset($metadata->author)) {
+ $author = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:creator', $metadata->author);
+ $opfMetadata->appendChild($author);
+ $author->setAttribute('opf:role', 'aut');
+ }
+ if (isset($metadata->publishDate)) {
+ $title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:date', $metadata->publishDate . 'T00:00:00+00:00');
+ $opfMetadata->appendChild($title);
+ }
+ if (isset($metadata->language)) {
+ $title = $opf->createElementNS(self::OPF_NAMESPACE_DC, 'dc:language', mb_substr(mb_strtolower($metadata->language), 0, 3));
+ $opfMetadata->appendChild($title);
+ }
+
+ $description = $opf->createElementNS(
+ self::OPF_NAMESPACE_DC, 'dc:description',
+ $this->twig->render(
+ self::TWIG_TEMPLATE_METADATA_DESCRIPTION,
+ (array)$metadata
+ )
+ );
+ $opfMetadata->appendChild($description);
+
+ $this->calibreAddMetaColumn($opfMetadata, 'buy', self::CALIBRE_COLUMN_BUY);
+ $this->calibreAddMetaColumn($opfMetadata, 'collection', self::CALIBRE_COLUMN_COLLECTION);
+ $this->calibreAddMetaColumn($opfMetadata, 'collection_manual', self::CALIBRE_COLUMN_COLLECTION_MANUAL);
+ $this->calibreAddMetaColumn($opfMetadata, 'genre', self::CALIBRE_COLUMN_GENRE);
+ $this->calibreAddMetaColumn($opfMetadata, 'groups_series', self::CALIBRE_COLUMN_GROUPS_SERIES);
+
+ if (file_put_contents($fileParentDirectory->getPathname() . DIRECTORY_SEPARATOR . 'metadata.opf', $opf->saveXML()) === false) {
+ $output->writeln('Failed to write the Calibre metadata: ' . (new LastErrorException())->getMessage() . '');
+ return;
+ }
+
+ $output->writeln('Calibre metadata generated !', OutputInterface::VERBOSITY_VERBOSE);
+ //endregion
+ }
+
+ /**
+ * Read and parse and XML document of a ZIP archive
+ *
+ * @param ZipArchive $fileArchive The ZIP archive
+ * @param string $documentPath The XML document path in ZIP archive
+ * @param string $label The label for the command line output
+ * @param string $namespace The XPath namespace to register
+ * @param OutputInterface $output The command line output
+ *
+ * @return DOMXPath|null The XPath or Null if failed
+ */
+ private function readAndParseXmlDocument (ZipArchive $fileArchive, string $documentPath, string $label, string $namespace, OutputInterface $output): ?DOMXPath {
+ //region Read the file
+ if (($stream = $fileArchive->getStream($documentPath)) === false) {
+ $output->writeln('Failed to open ' . $label . ': ' . $fileArchive->getStatusString() . '');
+ return null;
+ }
+ if (($content = stream_get_contents($stream)) === false) {
+ $output->writeln('Failed to read ' . $label . ': ' . (new LastErrorException())->getMessage() . '');
+ return null;
+ }
+ fclose($stream);
+ unset($stream);
+ //endregion
+ //region Parse it as XML
+ libxml_clear_errors();
+ $dom = new DOMDocument();
+ if (!$dom->loadXML($content, LIBXML_COMPACT)) {
+ $errors = libxml_get_errors();
+
+ $output->writeln('Failed to parse ' . $label . ': ' . count($errors) . ' errors');
+ foreach ($errors as $error) {
+ $levelName = match ($error->level) {
+ LIBXML_ERR_WARNING => 'Warning',
+ LIBXML_ERR_ERROR => 'Error',
+ LIBXML_ERR_FATAL => 'Fatal',
+ LIBXML_ERR_NONE => 'None',
+ default => 'Unknown (' . $error->level . ')'
+ };
+ $output->writeln(
+ '' . $levelName . ' #' . $error->code . ': ' . $error->message . ' (line: ' . $error->line . ', column: ' . $error->column . ')',
+ OutputInterface::VERBOSITY_VERBOSE
+ );
+ }
+ return null;
+ }
+ //endregion
+ //region Register XPath namespaces
+ $xpath = new DOMXPath($dom);
+ /** @noinspection HttpUrlsUsage */
+ if (!$xpath->registerNamespace('r', $namespace)) {
+ $output->writeln('Failed to register ' . $label . ' XPath namespace');
+ $fileArchive->close();
+ return null;
+ }
+ //endregion
+ return $xpath;
+ }
+ /**
+ * Convert a number to the metadata form
+ *
+ * @param string $number The number to convert
+ *
+ * @return string The number in metadata form
+ */
+ private function getMetadataNumber (string $number): string {
+ $numberLength = mb_strlen($number);
+ $number = MbstringExtended::str_pad($number, $numberLength + 3 - ($numberLength % 3), '0', STR_PAD_LEFT);
+ $numberParts = mb_str_split($number, 3);
+ $numberParts[0] = ltrim($numberParts[0], '0');
+ return implode(',', $numberParts);
+ }
+ /**
+ * Add a Calibre meta column
+ *
+ * @param DOMElement $metadata The "metadata" node
+ * @param string $name The column name
+ * @param array $content The column content and definition
+ *
+ * @return void
+ *
+ * @throws DOMException If an error occurs
+ */
+ private function calibreAddMetaColumn (DOMElement $metadata, string $name, array $content): void {
+ $metaColumn = $metadata->ownerDocument->createElement('meta');
+ $metadata->appendChild($metaColumn);
+
+ $metaColumn->setAttribute('name', 'calibre:user_metadata:#' . $name);
+ $metaColumn->setAttribute('content', json_encode($content));
}
}
\ No newline at end of file