mirror of
https://github.com/jikan-me/jikan-rest.git
synced 2025-02-20 11:23:35 +08:00
added incremental indexer
This commit is contained in:
parent
970ff8aad4
commit
e0cc44495b
183
app/Console/Commands/Indexer/IncrementalIndexer.php
Normal file
183
app/Console/Commands/Indexer/IncrementalIndexer.php
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Console\Commands\Indexer;
|
||||||
|
|
||||||
|
use Illuminate\Console\Command;
|
||||||
|
use Illuminate\Support\Facades\Storage;
|
||||||
|
use Illuminate\Support\Facades\Validator;
|
||||||
|
|
||||||
|
class IncrementalIndexer extends Command
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var bool
|
||||||
|
*/
|
||||||
|
private bool $cancelled = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The name and signature of the console command.
|
||||||
|
*
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
protected $signature = 'indexer:incremental {mediaType*}
|
||||||
|
{--delay=3 : Set a delay between requests}
|
||||||
|
{--resume : Resume from the last position}
|
||||||
|
{--failed : Run only entries that failed to index last time}';
|
||||||
|
|
||||||
|
protected function promptForMissingArgumentsUsing(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'mediaType' => ['The media type to index.', 'Valid values: anime, manga, character, people']
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function handle(): int
|
||||||
|
{
|
||||||
|
$validator = Validator::make(
|
||||||
|
[
|
||||||
|
'mediaType' => $this->argument('mediaType'),
|
||||||
|
'delay' => $this->option('delay'),
|
||||||
|
'resume' => $this->option('resume') ?? false,
|
||||||
|
'failed' => $this->option('failed') ?? false
|
||||||
|
],
|
||||||
|
[
|
||||||
|
'mediaType' => 'required|in:anime,manga,character,people',
|
||||||
|
'delay' => 'integer|min:1',
|
||||||
|
'resume' => 'bool|prohibited_with:failed',
|
||||||
|
'failed' => 'bool|prohibited_with:resume'
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
if ($validator->fails()) {
|
||||||
|
$this->error($validator->errors()->toJson());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->trap(SIGTERM, fn () => $this->cancelled = true);
|
||||||
|
|
||||||
|
$resume = $this->option('resume') ?? false;
|
||||||
|
$onlyFailed = $this->option('failed') ?? false;
|
||||||
|
$existingIdsHash = "";
|
||||||
|
$existingIdsRaw = "";
|
||||||
|
/**
|
||||||
|
* @var $mediaTypes array
|
||||||
|
*/
|
||||||
|
$mediaTypes = $this->argument("mediaType");
|
||||||
|
|
||||||
|
foreach ($mediaTypes as $mediaType)
|
||||||
|
{
|
||||||
|
$idsToFetch = [];
|
||||||
|
$failedIds = [];
|
||||||
|
$success = [];
|
||||||
|
|
||||||
|
if ($onlyFailed && Storage::exists("indexer/incremental/{$mediaType}_failed.json"))
|
||||||
|
{
|
||||||
|
$idsToFetch["sfw"] = json_decode(Storage::get("indexer/incremental/{$mediaType}_failed.json"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (Storage::exists("indexer/incremental/$mediaType.json"))
|
||||||
|
{
|
||||||
|
$existingIdsRaw = Storage::get("indexer/incremental/$mediaType.json");
|
||||||
|
$existingIdsHash = sha1($existingIdsRaw);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->cancelled)
|
||||||
|
{
|
||||||
|
return 127;
|
||||||
|
}
|
||||||
|
|
||||||
|
$newIdsRaw = file_get_contents("https://raw.githubusercontent.com/purarue/mal-id-cache/master/cache/${mediaType}_cache.json");
|
||||||
|
$newIdsHash = sha1($newIdsRaw);
|
||||||
|
|
||||||
|
/** @noinspection PhpConditionAlreadyCheckedInspection */
|
||||||
|
if ($this->cancelled)
|
||||||
|
{
|
||||||
|
return 127;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($newIdsHash !== $existingIdsHash)
|
||||||
|
{
|
||||||
|
$newIds = json_decode($newIdsRaw, true);
|
||||||
|
$existingIds = json_decode($existingIdsRaw, true);
|
||||||
|
|
||||||
|
if (is_null($existingIds) || count($existingIds) === 0)
|
||||||
|
{
|
||||||
|
$idsToFetch = $newIds;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
foreach (["sfw", "nsfw"] as $t)
|
||||||
|
{
|
||||||
|
$idsToFetch[$t] = array_diff($existingIds[$t], $newIds[$t]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Storage::put("indexer/incremental/$mediaType.json.tmp", $newIdsRaw);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$idCount = count($idsToFetch);
|
||||||
|
if ($idCount > 0)
|
||||||
|
{
|
||||||
|
$index = 0;
|
||||||
|
if ($resume && Storage::exists("indexer/incremental/{$mediaType}_resume.save"))
|
||||||
|
{
|
||||||
|
$index = (int)Storage::get("indexer/incremental/{$mediaType}_resume.save");
|
||||||
|
$this->info("Resuming from index: $index");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($index > 0 && !isset($this->ids[$index])) {
|
||||||
|
$index = 0;
|
||||||
|
$this->warn('Invalid index; set back to 0');
|
||||||
|
}
|
||||||
|
|
||||||
|
Storage::put("indexer/incremental/{$mediaType}_resume.save", 0);
|
||||||
|
|
||||||
|
$this->info("$idCount $mediaType entries available");
|
||||||
|
$ids = array_merge($idsToFetch['sfw'], $idsToFetch['nsfw']);
|
||||||
|
for ($i = $index; $i <= ($idCount - 1); $i++)
|
||||||
|
{
|
||||||
|
if ($this->cancelled)
|
||||||
|
{
|
||||||
|
return 127;
|
||||||
|
}
|
||||||
|
|
||||||
|
$id = $ids[$index];
|
||||||
|
|
||||||
|
$url = env('APP_URL') . "/v4/anime/$id";
|
||||||
|
$this->info("Indexing/Updating " . ($i + 1) . "/$idCount $url [MAL ID: $id]");
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
$response = json_decode(file_get_contents($url), true);
|
||||||
|
if (isset($response['error']) && $response['status'] != 404)
|
||||||
|
{
|
||||||
|
$this->error("[SKIPPED] Failed to fetch $url - {$response['error']}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (\Exception)
|
||||||
|
{
|
||||||
|
$this->warn("[SKIPPED] Failed to fetch $url");
|
||||||
|
$failedIds[] = $id;
|
||||||
|
Storage::put("indexer/incremental/$mediaType.failed", json_encode($failedIds));
|
||||||
|
}
|
||||||
|
|
||||||
|
$success[] = $id;
|
||||||
|
Storage::put("indexer/incremental/{$mediaType}_resume.save", $index);
|
||||||
|
}
|
||||||
|
|
||||||
|
Storage::delete("indexer/incremental/{$mediaType}_resume.save");
|
||||||
|
$this->info("--- Indexing of $mediaType is complete.");
|
||||||
|
$this->info(count($success) . ' entries indexed or updated.');
|
||||||
|
if (count($failedIds) > 0)
|
||||||
|
{
|
||||||
|
$this->info(count($failedIds) . ' entries failed to index or update. Re-run with --failed to requeue failed entries only.');
|
||||||
|
}
|
||||||
|
// finalize the latest state
|
||||||
|
Storage::move("indexer/incremental/$mediaType.json.tmp", "indexer/incremental/$mediaType.json");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
@ -24,7 +24,8 @@ class Kernel extends ConsoleKernel
|
|||||||
Indexer\GenreIndexer::class,
|
Indexer\GenreIndexer::class,
|
||||||
Indexer\ProducersIndexer::class,
|
Indexer\ProducersIndexer::class,
|
||||||
Indexer\AnimeSweepIndexer::class,
|
Indexer\AnimeSweepIndexer::class,
|
||||||
Indexer\MangaSweepIndexer::class
|
Indexer\MangaSweepIndexer::class,
|
||||||
|
Indexer\IncrementalIndexer::class
|
||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
"php": "^8.1",
|
"php": "^8.1",
|
||||||
"ext-json": "*",
|
"ext-json": "*",
|
||||||
"ext-mongodb": "*",
|
"ext-mongodb": "*",
|
||||||
|
"ext-pcntl": "*",
|
||||||
"amphp/http-client": "^4.6",
|
"amphp/http-client": "^4.6",
|
||||||
"danielmewes/php-rql": "dev-master",
|
"danielmewes/php-rql": "dev-master",
|
||||||
"darkaonline/swagger-lume": "^9.0",
|
"darkaonline/swagger-lume": "^9.0",
|
||||||
|
@ -34,6 +34,7 @@ display_help() {
|
|||||||
echo "stop Stop Jikan API"
|
echo "stop Stop Jikan API"
|
||||||
echo "validate-prereqs Validate pre-reqs installed (docker, docker-compose)"
|
echo "validate-prereqs Validate pre-reqs installed (docker, docker-compose)"
|
||||||
echo "execute-indexers Execute the indexers, which will scrape and index data from MAL. (Notice: This can take days)"
|
echo "execute-indexers Execute the indexers, which will scrape and index data from MAL. (Notice: This can take days)"
|
||||||
|
echo "index-incrementally Executes the incremental indexers for each media type. (anime, manga, character, people)"
|
||||||
echo ""
|
echo ""
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -168,6 +169,10 @@ case "$1" in
|
|||||||
$DOCKER_COMPOSE_CMD -p "$DOCKER_COMPOSE_PROJECT_NAME" exec jikan_rest php /app/artisan indexer:producers
|
$DOCKER_COMPOSE_CMD -p "$DOCKER_COMPOSE_PROJECT_NAME" exec jikan_rest php /app/artisan indexer:producers
|
||||||
echo "Indexing done!"
|
echo "Indexing done!"
|
||||||
;;
|
;;
|
||||||
|
"index-incrementally")
|
||||||
|
echo "Indexing..."
|
||||||
|
$DOCKER_COMPOSE_CMD -p "$DOCKER_COMPOSE_PROJECT_NAME" exec jikan_rest php /app/artisan indexer:incremental anime manga character people
|
||||||
|
echo "Indexing done!"
|
||||||
*)
|
*)
|
||||||
echo "No command specified, displaying help"
|
echo "No command specified, displaying help"
|
||||||
display_help
|
display_help
|
||||||
|
Loading…
x
Reference in New Issue
Block a user