diff --git a/app/Console/Commands/Indexer/IncrementalIndexer.php b/app/Console/Commands/Indexer/IncrementalIndexer.php new file mode 100644 index 0000000..bc618e0 --- /dev/null +++ b/app/Console/Commands/Indexer/IncrementalIndexer.php @@ -0,0 +1,183 @@ + ['The media type to index.', 'Valid values: anime, manga, character, people'] + ]; + } + + public function handle(): int + { + $validator = Validator::make( + [ + 'mediaType' => $this->argument('mediaType'), + 'delay' => $this->option('delay'), + 'resume' => $this->option('resume') ?? false, + 'failed' => $this->option('failed') ?? false + ], + [ + 'mediaType' => 'required|in:anime,manga,character,people', + 'delay' => 'integer|min:1', + 'resume' => 'bool|prohibited_with:failed', + 'failed' => 'bool|prohibited_with:resume' + ] + ); + + if ($validator->fails()) { + $this->error($validator->errors()->toJson()); + return 1; + } + + $this->trap(SIGTERM, fn () => $this->cancelled = true); + + $resume = $this->option('resume') ?? false; + $onlyFailed = $this->option('failed') ?? false; + $existingIdsHash = ""; + $existingIdsRaw = ""; + /** + * @var $mediaTypes array + */ + $mediaTypes = $this->argument("mediaType"); + + foreach ($mediaTypes as $mediaType) + { + $idsToFetch = []; + $failedIds = []; + $success = []; + + if ($onlyFailed && Storage::exists("indexer/incremental/{$mediaType}_failed.json")) + { + $idsToFetch["sfw"] = json_decode(Storage::get("indexer/incremental/{$mediaType}_failed.json")); + } + else + { + if (Storage::exists("indexer/incremental/$mediaType.json")) + { + $existingIdsRaw = Storage::get("indexer/incremental/$mediaType.json"); + $existingIdsHash = sha1($existingIdsRaw); + } + + if ($this->cancelled) + { + return 127; + } + + $newIdsRaw = file_get_contents("https://raw.githubusercontent.com/purarue/mal-id-cache/master/cache/${mediaType}_cache.json"); + $newIdsHash = sha1($newIdsRaw); + + /** @noinspection PhpConditionAlreadyCheckedInspection */ + if ($this->cancelled) + { + return 127; + } + + if ($newIdsHash !== $existingIdsHash) + { + $newIds = json_decode($newIdsRaw, true); + $existingIds = json_decode($existingIdsRaw, true); + + if (is_null($existingIds) || count($existingIds) === 0) + { + $idsToFetch = $newIds; + } + else + { + foreach (["sfw", "nsfw"] as $t) + { + $idsToFetch[$t] = array_diff($existingIds[$t], $newIds[$t]); + } + } + + Storage::put("indexer/incremental/$mediaType.json.tmp", $newIdsRaw); + } + } + + $idCount = count($idsToFetch); + if ($idCount > 0) + { + $index = 0; + if ($resume && Storage::exists("indexer/incremental/{$mediaType}_resume.save")) + { + $index = (int)Storage::get("indexer/incremental/{$mediaType}_resume.save"); + $this->info("Resuming from index: $index"); + } + + if ($index > 0 && !isset($this->ids[$index])) { + $index = 0; + $this->warn('Invalid index; set back to 0'); + } + + Storage::put("indexer/incremental/{$mediaType}_resume.save", 0); + + $this->info("$idCount $mediaType entries available"); + $ids = array_merge($idsToFetch['sfw'], $idsToFetch['nsfw']); + for ($i = $index; $i <= ($idCount - 1); $i++) + { + if ($this->cancelled) + { + return 127; + } + + $id = $ids[$index]; + + $url = env('APP_URL') . "/v4/anime/$id"; + $this->info("Indexing/Updating " . ($i + 1) . "/$idCount $url [MAL ID: $id]"); + + try + { + $response = json_decode(file_get_contents($url), true); + if (isset($response['error']) && $response['status'] != 404) + { + $this->error("[SKIPPED] Failed to fetch $url - {$response['error']}"); + } + } + catch (\Exception) + { + $this->warn("[SKIPPED] Failed to fetch $url"); + $failedIds[] = $id; + Storage::put("indexer/incremental/$mediaType.failed", json_encode($failedIds)); + } + + $success[] = $id; + Storage::put("indexer/incremental/{$mediaType}_resume.save", $index); + } + + Storage::delete("indexer/incremental/{$mediaType}_resume.save"); + $this->info("--- Indexing of $mediaType is complete."); + $this->info(count($success) . ' entries indexed or updated.'); + if (count($failedIds) > 0) + { + $this->info(count($failedIds) . ' entries failed to index or update. Re-run with --failed to requeue failed entries only.'); + } + // finalize the latest state + Storage::move("indexer/incremental/$mediaType.json.tmp", "indexer/incremental/$mediaType.json"); + } + } + + return 0; + } +} diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index 22c9b19..eafe555 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -24,7 +24,8 @@ class Kernel extends ConsoleKernel Indexer\GenreIndexer::class, Indexer\ProducersIndexer::class, Indexer\AnimeSweepIndexer::class, - Indexer\MangaSweepIndexer::class + Indexer\MangaSweepIndexer::class, + Indexer\IncrementalIndexer::class ]; /** diff --git a/composer.json b/composer.json index 8234697..55e6a70 100644 --- a/composer.json +++ b/composer.json @@ -14,6 +14,7 @@ "php": "^8.1", "ext-json": "*", "ext-mongodb": "*", + "ext-pcntl": "*", "amphp/http-client": "^4.6", "danielmewes/php-rql": "dev-master", "darkaonline/swagger-lume": "^9.0", diff --git a/container-setup.sh b/container-setup.sh index 89c39c6..dd56f6e 100755 --- a/container-setup.sh +++ b/container-setup.sh @@ -34,6 +34,7 @@ display_help() { echo "stop Stop Jikan API" echo "validate-prereqs Validate pre-reqs installed (docker, docker-compose)" echo "execute-indexers Execute the indexers, which will scrape and index data from MAL. (Notice: This can take days)" + echo "index-incrementally Executes the incremental indexers for each media type. (anime, manga, character, people)" echo "" } @@ -168,6 +169,10 @@ case "$1" in $DOCKER_COMPOSE_CMD -p "$DOCKER_COMPOSE_PROJECT_NAME" exec jikan_rest php /app/artisan indexer:producers echo "Indexing done!" ;; + "index-incrementally") + echo "Indexing..." + $DOCKER_COMPOSE_CMD -p "$DOCKER_COMPOSE_PROJECT_NAME" exec jikan_rest php /app/artisan indexer:incremental anime manga character people + echo "Indexing done!" *) echo "No command specified, displaying help" display_help