Add `data.match_from_pairs` fuzzy symbology scanner

A helper for scanning a "pairs table" that most backends should expose
as part of their (internal) symbology set using `rapidfuzz` over
a `dict[str, Struct]` input table.

Also expose the `data.types.Struct` at the subpkg top level.
ib_py311_fixes
Tyler Goodlet 2023-09-22 13:53:18 -04:00
parent a97a0ced8c
commit 0ba75df877
2 changed files with 42 additions and 1 deletions

View File

@ -43,8 +43,10 @@ from ._symcache import (
SymbologyCache, SymbologyCache,
open_symcache, open_symcache,
get_symcache, get_symcache,
match_from_pairs,
) )
from ._sampling import open_sample_stream from ._sampling import open_sample_stream
from ..types import Struct
__all__: list[str] = [ __all__: list[str] = [
@ -62,6 +64,7 @@ __all__: list[str] = [
'open_symcache', 'open_symcache',
'open_sample_stream', 'open_sample_stream',
'get_symcache', 'get_symcache',
'Struct',
'SymbologyCache', 'SymbologyCache',
'types', 'types',
] ]

View File

@ -308,7 +308,7 @@ class SymbologyCache(Struct):
matches in a `dict` including the `MktPair` values. matches in a `dict` including the `MktPair` values.
''' '''
matches = fuzzy.extractBests( matches = fuzzy.extract(
pattern, pattern,
getattr(self, table), getattr(self, table),
score_cutoff=50, score_cutoff=50,
@ -466,3 +466,41 @@ def get_symcache(
pdbp.xpm() pdbp.xpm()
return symcache return symcache
def match_from_pairs(
pairs: dict[str, Struct],
query: str,
score_cutoff: int = 50,
) -> dict[str, Struct]:
'''
Fuzzy search over a "pairs table" maintained by most backends
as part of their symbology-info caching internals.
Scan the native symbol key set and return best ranked
matches back in a new `dict`.
'''
# TODO: somehow cache this list (per call) like we were in
# `open_symbol_search()`?
keys: list[str] = list(pairs)
matches: list[tuple[
Sequence[Hashable], # matching input key
Any, # scores
Any,
]] = fuzzy.extract(
# NOTE: most backends provide keys uppercased
query=query,
choices=keys,
score_cutoff=score_cutoff,
)
# pop and repack pairs in output dict
matched_pairs: dict[str, Pair] = {}
for item in matches:
pair_key: str = item[0]
matched_pairs[pair_key] = pairs[pair_key]
return matched_pairs