more tuning for the search engine

This commit is contained in:
pushrbx 2023-07-09 10:50:18 +01:00
parent 01248abc24
commit 7a88c030b4
6 changed files with 73 additions and 12 deletions

View File

@ -374,7 +374,7 @@ class Anime extends JikanApiSearchableModel
public function getTypeSenseQueryByWeights(): string|null
{
return "2,2,1,1,2,2,1";
return "2,2,1,1,3,3,1";
}
/**
@ -385,7 +385,7 @@ class Anime extends JikanApiSearchableModel
{
return [
[
"field" => "_text_match(buckets:" . max_results_per_page() . ")",
"field" => "_text_match(buckets:".text_match_buckets().")",
"direction" => "desc"
],
[

View File

@ -291,7 +291,7 @@ class Manga extends JikanApiSearchableModel
public function getTypeSenseQueryByWeights(): string|null
{
return "2,2,1,1,2,2,1";
return "2,2,1,1,3,3,1";
}
/**
@ -302,7 +302,7 @@ class Manga extends JikanApiSearchableModel
{
return [
[
"field" => "_text_match(buckets:" . App::make("jikan-config")->maxResultsPerPage() . ")",
"field" => "_text_match(buckets:".text_match_buckets().")",
"direction" => "desc"
],
[

View File

@ -14,12 +14,15 @@ class TypeSenseScoutSearchService implements ScoutSearchService
{
private int $maxItemsPerPage;
private JikanConfig $jikanConfig;
public function __construct(private readonly Repository $repository,
JikanConfig $config,
private readonly TypesenseCollectionDescriptor $collectionDescriptor,
private readonly SearchAnalyticsService $searchAnalytics)
{
$this->maxItemsPerPage = (int) $config->maxResultsPerPage();
$this->jikanConfig = $config;
if ($this->maxItemsPerPage > 250) {
$this->maxItemsPerPage = 250;
}
@ -41,14 +44,11 @@ class TypeSenseScoutSearchService implements ScoutSearchService
private function middleware(?string $orderByField = null, bool $sortDirectionDescending = false): \Closure
{
return function (Documents $documents, string $query, array $options) use ($orderByField, $sortDirectionDescending) {
// let's enable exhaustive search
// which will make Typesense consider all variations of prefixes and typo corrections of the words
// in the query exhaustively, without stopping early when enough results are found.
$options['exhaustive_search'] = env('TYPESENSE_SEARCH_EXHAUSTIVE', "false");
$options['search_cutoff_ms'] = (int) env('TYPESENSE_SEARCH_CUTOFF_MS', 450);
$options['exhaustive_search'] = $this->jikanConfig->exhaustiveSearch();
$options['search_cutoff_ms'] = $this->jikanConfig->searchCutOffMs();
// this will be ignored together with exhaustive_search set to "true"
$options['drop_tokens_threshold'] = (int) env('TYPESENSE_DROP_TOKENS_THRESHOLD', $this->maxItemsPerPage);
$options['typo_tokens_threshold'] = (int) env('TYPESENSE_TYPO_TOKENS_THRESHOLD', $this->maxItemsPerPage);
$options['drop_tokens_threshold'] = $this->jikanConfig->dropTokensThreshold();
$options['typo_tokens_threshold'] = $this->jikanConfig->typoTokensThreshold();
$options['enable_highlight_v1'] = 'false';
$options['infix'] = 'fallback';
// prevent `Could not parse the filter query: unbalanced `&&` operands.` error
@ -70,6 +70,8 @@ class TypeSenseScoutSearchService implements ScoutSearchService
$options = $this->overrideSortingOrder($options, $modelInstance, $orderByField, $sortDirectionDescending);
}
dd($options, $orderByField);
$results = $documents->search($options);
$this->recordSearchTelemetry($query, $results);

View File

@ -20,13 +20,28 @@ final class JikanConfig
private Collection $config;
private int $textMatchBuckets;
private int $typoTokensThreshold;
private int $dropTokensThreshold;
private int $searchCutOffMs;
private string $exhaustiveSearch;
public function __construct(array $config)
{
$config = collect($config);
$this->perEndpointCacheTtl = $config->get("per_endpoint_cache_ttl", []);
$this->defaultCacheExpire = $config->get("default_cache_expire", 0);
$this->microCachingEnabled = in_array($config->get("micro_caching_enabled", false), [true, 1, "1", "true"]);
$this->textMatchBuckets = $config->get("typesense_options.text_match_buckets", 85);
$this->exhaustiveSearch = (string) $config->get("typesense_options.exhaustive_search", "false");
$this->config = $config;
$this->typoTokensThreshold = $config->get("typesense_options.typo_tokens_threshold", $this->maxResultsPerPage());
$this->dropTokensThreshold = $config->get("typesense_options.drop_tokens_threshold", $this->maxResultsPerPage());
$this->searchCutOffMs = $config->get("typesense_options.search_cutoff_ms", 450);
}
public function cacheTtlForEndpoint(string $endpoint): ?int
@ -48,4 +63,34 @@ final class JikanConfig
{
return $this->config->get("max_results_per_page", $defaultValue ?? 25);
}
public function textMatchBuckets(): int
{
return $this->textMatchBuckets;
}
public function typoTokensThreshold(): int
{
return $this->typoTokensThreshold;
}
public function dropTokensThreshold(): int
{
return $this->dropTokensThreshold;
}
public function exhaustiveSearch(): string
{
$normalizedValue = strtolower($this->exhaustiveSearch);
return match($this->exhaustiveSearch) {
"0", "FALSE" => "false",
"1", "TRUE" => "true",
default => in_array($normalizedValue, ["true", "false"]) ? $normalizedValue : "false"
};
}
public function searchCutOffMs(): int
{
return $this->searchCutOffMs;
}
}

View File

@ -61,3 +61,10 @@ if (!function_exists('max_results_per_page')) {
return app()->make("jikan-config")->maxResultsPerPage($fallbackLimit);
}
}
if (!function_exists('text_match_buckets')) {
function text_match_buckets(): int
{
return app()->make("jikan-config")->textMatchBuckets();
}
}

View File

@ -1,9 +1,16 @@
<?php
return [
'max_results_per_page' => env('MAX_RESULTS_PER_PAGE', 25),
'max_results_per_page' => (int) env('MAX_RESULTS_PER_PAGE', 25),
'micro_caching_enabled' => env('MICROCACHING', false),
'default_cache_expire' => env('CACHE_DEFAULT_EXPIRE', 86400),
'typesense_options' => [
'text_match_buckets' => env('TYPESENSE_TEXT_MATCH_BUCKETS', 85),
'typo_tokens_threshold' => (int) env('TYPESENSE_TYPO_TOKENS_THRESHOLD'),
'drop_tokens_threshold' => (int) env('TYPESENSE_DROP_TOKENS_THRESHOLD'),
'search_cutoff_ms' => (int) env('TYPESENSE_SEARCH_CUTOFF_MS', 450),
'exhaustive_search' => env('TYPESENSE_ENABLE_EXHAUSTIVE_SEARCH', 'false')
],
'per_endpoint_cache_ttl' => [
/**
* Anime