diff --git a/app/Console/Commands/Indexer/IncrementalIndexer.php b/app/Console/Commands/Indexer/IncrementalIndexer.php index bc618e0..8e09ecf 100644 --- a/app/Console/Commands/Indexer/IncrementalIndexer.php +++ b/app/Console/Commands/Indexer/IncrementalIndexer.php @@ -30,8 +30,139 @@ class IncrementalIndexer extends Command ]; } + private function getExistingIds(string $mediaType): array + { + $existingIdsHash = ""; + $existingIdsRaw = ""; + + if (Storage::exists("indexer/incremental/$mediaType.json")) + { + $existingIdsRaw = Storage::get("indexer/incremental/$mediaType.json"); + $existingIdsHash = sha1($existingIdsRaw); + } + + return [$existingIdsHash, $existingIdsRaw]; + } + + private function getIdsToFetch(string $mediaType): array + { + $idsToFetch = []; + [$existingIdsHash, $existingIdsRaw] = $this->getExistingIds($mediaType); + + if ($this->cancelled) + { + return []; + } + + $newIdsRaw = file_get_contents("https://raw.githubusercontent.com/purarue/mal-id-cache/master/cache/${mediaType}_cache.json"); + $newIdsHash = sha1($newIdsRaw); + + /** @noinspection PhpConditionAlreadyCheckedInspection */ + if ($this->cancelled) + { + return []; + } + + if ($newIdsHash !== $existingIdsHash) + { + $newIds = json_decode($newIdsRaw, true); + $existingIds = json_decode($existingIdsRaw, true); + + if (is_null($existingIds) || count($existingIds) === 0) + { + $idsToFetch = $newIds; + } + else + { + foreach (["sfw", "nsfw"] as $t) + { + $idsToFetch[$t] = array_diff($existingIds[$t], $newIds[$t]); + } + } + + Storage::put("indexer/incremental/$mediaType.json.tmp", $newIdsRaw); + } + + return $idsToFetch; + } + + private function getFailedIdsToFetch(string $mediaType): array + { + return json_decode(Storage::get("indexer/incremental/{$mediaType}_failed.json")); + } + + private function fetchIds(string $mediaType, array $idsToFetch, bool $resume): void + { + $index = 0; + $success = []; + $failedIds = []; + $idCount = count($idsToFetch); + if ($resume && Storage::exists("indexer/incremental/{$mediaType}_resume.save")) + { + $index = (int)Storage::get("indexer/incremental/{$mediaType}_resume.save"); + $this->info("Resuming from index: $index"); + } + + if ($index > 0 && !isset($this->ids[$index])) + { + $index = 0; + $this->warn('Invalid index; set back to 0'); + } + + Storage::put("indexer/incremental/{$mediaType}_resume.save", 0); + + $this->info("$idCount $mediaType entries available"); + $ids = array_merge($idsToFetch['sfw'], $idsToFetch['nsfw']); + + for ($i = $index; $i <= ($idCount - 1); $i++) + { + if ($this->cancelled) + { + return; + } + + $id = $ids[$index]; + + $url = env('APP_URL') . "/v4/anime/$id"; + $this->info("Indexing/Updating " . ($i + 1) . "/$idCount $url [MAL ID: $id]"); + + try + { + $response = json_decode(file_get_contents($url), true); + if (!isset($response['error']) || $response['status'] == 404) + { + continue; + } + + $this->error("[SKIPPED] Failed to fetch $url - {$response['error']}"); + } + catch (\Exception) + { + $this->warn("[SKIPPED] Failed to fetch $url"); + $failedIds[] = $id; + Storage::put("indexer/incremental/$mediaType.failed", json_encode($failedIds)); + } + + $success[] = $id; + Storage::put("indexer/incremental/{$mediaType}_resume.save", $index); + } + + Storage::delete("indexer/incremental/{$mediaType}_resume.save"); + + $this->info("--- Indexing of $mediaType is complete."); + $this->info(count($success) . ' entries indexed or updated.'); + if (count($failedIds) > 0) + { + $this->info(count($failedIds) . ' entries failed to index or update. Re-run with --failed to requeue failed entries only.'); + } + + // finalize the latest state + Storage::move("indexer/incremental/$mediaType.json.tmp", "indexer/incremental/$mediaType.json"); + } + public function handle(): int { + // validate inputs $validator = Validator::make( [ 'mediaType' => $this->argument('mediaType'), @@ -52,12 +183,12 @@ class IncrementalIndexer extends Command return 1; } + // we want to handle signals from the OS $this->trap(SIGTERM, fn () => $this->cancelled = true); $resume = $this->option('resume') ?? false; $onlyFailed = $this->option('failed') ?? false; - $existingIdsHash = ""; - $existingIdsRaw = ""; + /** * @var $mediaTypes array */ @@ -66,116 +197,28 @@ class IncrementalIndexer extends Command foreach ($mediaTypes as $mediaType) { $idsToFetch = []; - $failedIds = []; - $success = []; + // if "--failed" option is specified just run the failed ones if ($onlyFailed && Storage::exists("indexer/incremental/{$mediaType}_failed.json")) { - $idsToFetch["sfw"] = json_decode(Storage::get("indexer/incremental/{$mediaType}_failed.json")); + $idsToFetch["sfw"] = $this->getFailedIdsToFetch($mediaType); } else { - if (Storage::exists("indexer/incremental/$mediaType.json")) - { - $existingIdsRaw = Storage::get("indexer/incremental/$mediaType.json"); - $existingIdsHash = sha1($existingIdsRaw); - } - - if ($this->cancelled) - { - return 127; - } - - $newIdsRaw = file_get_contents("https://raw.githubusercontent.com/purarue/mal-id-cache/master/cache/${mediaType}_cache.json"); - $newIdsHash = sha1($newIdsRaw); - - /** @noinspection PhpConditionAlreadyCheckedInspection */ - if ($this->cancelled) - { - return 127; - } - - if ($newIdsHash !== $existingIdsHash) - { - $newIds = json_decode($newIdsRaw, true); - $existingIds = json_decode($existingIdsRaw, true); - - if (is_null($existingIds) || count($existingIds) === 0) - { - $idsToFetch = $newIds; - } - else - { - foreach (["sfw", "nsfw"] as $t) - { - $idsToFetch[$t] = array_diff($existingIds[$t], $newIds[$t]); - } - } - - Storage::put("indexer/incremental/$mediaType.json.tmp", $newIdsRaw); - } + $idsToFetch = $this->getIdsToFetch($mediaType); } $idCount = count($idsToFetch); - if ($idCount > 0) + if ($idCount == 0) { - $index = 0; - if ($resume && Storage::exists("indexer/incremental/{$mediaType}_resume.save")) + if ($this->cancelled) { - $index = (int)Storage::get("indexer/incremental/{$mediaType}_resume.save"); - $this->info("Resuming from index: $index"); + return 127; } - - if ($index > 0 && !isset($this->ids[$index])) { - $index = 0; - $this->warn('Invalid index; set back to 0'); - } - - Storage::put("indexer/incremental/{$mediaType}_resume.save", 0); - - $this->info("$idCount $mediaType entries available"); - $ids = array_merge($idsToFetch['sfw'], $idsToFetch['nsfw']); - for ($i = $index; $i <= ($idCount - 1); $i++) - { - if ($this->cancelled) - { - return 127; - } - - $id = $ids[$index]; - - $url = env('APP_URL') . "/v4/anime/$id"; - $this->info("Indexing/Updating " . ($i + 1) . "/$idCount $url [MAL ID: $id]"); - - try - { - $response = json_decode(file_get_contents($url), true); - if (isset($response['error']) && $response['status'] != 404) - { - $this->error("[SKIPPED] Failed to fetch $url - {$response['error']}"); - } - } - catch (\Exception) - { - $this->warn("[SKIPPED] Failed to fetch $url"); - $failedIds[] = $id; - Storage::put("indexer/incremental/$mediaType.failed", json_encode($failedIds)); - } - - $success[] = $id; - Storage::put("indexer/incremental/{$mediaType}_resume.save", $index); - } - - Storage::delete("indexer/incremental/{$mediaType}_resume.save"); - $this->info("--- Indexing of $mediaType is complete."); - $this->info(count($success) . ' entries indexed or updated.'); - if (count($failedIds) > 0) - { - $this->info(count($failedIds) . ' entries failed to index or update. Re-run with --failed to requeue failed entries only.'); - } - // finalize the latest state - Storage::move("indexer/incremental/$mediaType.json.tmp", "indexer/incremental/$mediaType.json"); + continue; } + + $this->fetchIds($mediaType, $idsToFetch, $resume); } return 0;