Merge pull request #421 from pushrbx/search-improvements-4

More search tuning and hotfixes
This commit is contained in:
Irfan (Nekomata) 2023-07-09 20:17:15 +05:00 committed by GitHub
commit 466dcac516
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 72 additions and 39 deletions

View File

@ -374,7 +374,7 @@ class Anime extends JikanApiSearchableModel
public function getTypeSenseQueryByWeights(): string|null
{
return "2,2,1,1,2,2,1";
return "2,2,1,1,3,3,1";
}
/**
@ -385,7 +385,7 @@ class Anime extends JikanApiSearchableModel
{
return [
[
"field" => "_text_match(buckets:" . max_results_per_page() . ")",
"field" => "_text_match(buckets:".text_match_buckets().")",
"direction" => "desc"
],
[

View File

@ -25,14 +25,4 @@ class AnimeSearchHandler extends SearchRequestHandler
{
return new AnimeCollection($paginator);
}
protected function prepareOrderByParam(Collection $requestData): Collection
{
if ($requestData->has("q") && !$requestData->has("order_by")) {
// default order by should be popularity, as MAL seems to use this trick.
$requestData->offsetSet("order_by", AnimeOrderByEnum::popularity());
}
return parent::prepareOrderByParam($requestData);
}
}

View File

@ -28,14 +28,4 @@ class MangaSearchHandler extends SearchRequestHandler
{
return new MangaCollection($paginator);
}
protected function prepareOrderByParam(Collection $requestData): Collection
{
if ($requestData->has("q") && !$requestData->has("order_by")) {
// default order by should be popularity, as MAL seems to use this trick.
$requestData->offsetSet("order_by", MangaOrderByEnum::popularity());
}
return parent::prepareOrderByParam($requestData);
}
}

View File

@ -48,17 +48,11 @@ abstract class SearchRequestHandler implements RequestHandler
protected function prepareOrderByParam(Collection $requestData): Collection
{
if (!$requestData->has('order_by') || !$requestData->get('order_by') instanceof Enum) {
$requestData->offsetSet('order_by', 'mal_id');
return $requestData;
}
if ($requestData->has('order_by')) {
if ($requestData->has('order_by') && !is_null($requestData->get("order_by"))) {
$requestData->offsetSet("order_by", $requestData->get("order_by")->label);
return $requestData;
}
$requestData->offsetSet("order_by", 'mal_id');
return $requestData;
}
}

View File

@ -291,7 +291,7 @@ class Manga extends JikanApiSearchableModel
public function getTypeSenseQueryByWeights(): string|null
{
return "2,2,1,1,2,2,1";
return "2,2,1,1,3,3,1";
}
/**
@ -302,7 +302,7 @@ class Manga extends JikanApiSearchableModel
{
return [
[
"field" => "_text_match(buckets:" . App::make("jikan-config")->maxResultsPerPage() . ")",
"field" => "_text_match(buckets:".text_match_buckets().")",
"direction" => "desc"
],
[

View File

@ -14,12 +14,15 @@ class TypeSenseScoutSearchService implements ScoutSearchService
{
private int $maxItemsPerPage;
private JikanConfig $jikanConfig;
public function __construct(private readonly Repository $repository,
JikanConfig $config,
private readonly TypesenseCollectionDescriptor $collectionDescriptor,
private readonly SearchAnalyticsService $searchAnalytics)
{
$this->maxItemsPerPage = (int) $config->maxResultsPerPage();
$this->jikanConfig = $config;
if ($this->maxItemsPerPage > 250) {
$this->maxItemsPerPage = 250;
}
@ -41,14 +44,11 @@ class TypeSenseScoutSearchService implements ScoutSearchService
private function middleware(?string $orderByField = null, bool $sortDirectionDescending = false): \Closure
{
return function (Documents $documents, string $query, array $options) use ($orderByField, $sortDirectionDescending) {
// let's enable exhaustive search
// which will make Typesense consider all variations of prefixes and typo corrections of the words
// in the query exhaustively, without stopping early when enough results are found.
$options['exhaustive_search'] = env('TYPESENSE_SEARCH_EXHAUSTIVE', "false");
$options['search_cutoff_ms'] = (int) env('TYPESENSE_SEARCH_CUTOFF_MS', 450);
$options['exhaustive_search'] = $this->jikanConfig->exhaustiveSearch();
$options['search_cutoff_ms'] = $this->jikanConfig->searchCutOffMs();
// this will be ignored together with exhaustive_search set to "true"
$options['drop_tokens_threshold'] = (int) env('TYPESENSE_DROP_TOKENS_THRESHOLD', $this->maxItemsPerPage);
$options['typo_tokens_threshold'] = (int) env('TYPESENSE_TYPO_TOKENS_THRESHOLD', $this->maxItemsPerPage);
$options['drop_tokens_threshold'] = $this->jikanConfig->dropTokensThreshold();
$options['typo_tokens_threshold'] = $this->jikanConfig->typoTokensThreshold();
$options['enable_highlight_v1'] = 'false';
$options['infix'] = 'fallback';
// prevent `Could not parse the filter query: unbalanced `&&` operands.` error

View File

@ -20,13 +20,28 @@ final class JikanConfig
private Collection $config;
private int $textMatchBuckets;
private int $typoTokensThreshold;
private int $dropTokensThreshold;
private int $searchCutOffMs;
private string $exhaustiveSearch;
public function __construct(array $config)
{
$config = collect($config);
$this->perEndpointCacheTtl = $config->get("per_endpoint_cache_ttl", []);
$this->defaultCacheExpire = $config->get("default_cache_expire", 0);
$this->microCachingEnabled = in_array($config->get("micro_caching_enabled", false), [true, 1, "1", "true"]);
$this->textMatchBuckets = $config->get("typesense_options.text_match_buckets", 85);
$this->exhaustiveSearch = (string) $config->get("typesense_options.exhaustive_search", "false");
$this->config = $config;
$this->typoTokensThreshold = $config->get("typesense_options.typo_tokens_threshold", $this->maxResultsPerPage());
$this->dropTokensThreshold = $config->get("typesense_options.drop_tokens_threshold", $this->maxResultsPerPage());
$this->searchCutOffMs = $config->get("typesense_options.search_cutoff_ms", 450);
}
public function cacheTtlForEndpoint(string $endpoint): ?int
@ -48,4 +63,34 @@ final class JikanConfig
{
return $this->config->get("max_results_per_page", $defaultValue ?? 25);
}
public function textMatchBuckets(): int
{
return $this->textMatchBuckets;
}
public function typoTokensThreshold(): int
{
return $this->typoTokensThreshold;
}
public function dropTokensThreshold(): int
{
return $this->dropTokensThreshold;
}
public function exhaustiveSearch(): string
{
$normalizedValue = strtolower($this->exhaustiveSearch);
return match($this->exhaustiveSearch) {
"0", "FALSE" => "false",
"1", "TRUE" => "true",
default => in_array($normalizedValue, ["true", "false"]) ? $normalizedValue : "false"
};
}
public function searchCutOffMs(): int
{
return $this->searchCutOffMs;
}
}

View File

@ -61,3 +61,10 @@ if (!function_exists('max_results_per_page')) {
return app()->make("jikan-config")->maxResultsPerPage($fallbackLimit);
}
}
if (!function_exists('text_match_buckets')) {
function text_match_buckets(): int
{
return app()->make("jikan-config")->textMatchBuckets();
}
}

View File

@ -1,9 +1,16 @@
<?php
return [
'max_results_per_page' => env('MAX_RESULTS_PER_PAGE', 25),
'max_results_per_page' => (int) env('MAX_RESULTS_PER_PAGE', 25),
'micro_caching_enabled' => env('MICROCACHING', false),
'default_cache_expire' => env('CACHE_DEFAULT_EXPIRE', 86400),
'typesense_options' => [
'text_match_buckets' => env('TYPESENSE_TEXT_MATCH_BUCKETS', 85),
'typo_tokens_threshold' => (int) env('TYPESENSE_TYPO_TOKENS_THRESHOLD'),
'drop_tokens_threshold' => (int) env('TYPESENSE_DROP_TOKENS_THRESHOLD'),
'search_cutoff_ms' => (int) env('TYPESENSE_SEARCH_CUTOFF_MS', 450),
'exhaustive_search' => env('TYPESENSE_ENABLE_EXHAUSTIVE_SEARCH', 'false')
],
'per_endpoint_cache_ttl' => [
/**
* Anime