Add `data.match_from_pairs` fuzzy symbology scanner
A helper for scanning a "pairs table" that most backends should expose as part of their (internal) symbology set using `rapidfuzz` over a `dict[str, Struct]` input table. Also expose the `data.types.Struct` at the subpkg top level.ib_py311_fixes
parent
a97a0ced8c
commit
0ba75df877
|
@ -43,8 +43,10 @@ from ._symcache import (
|
|||
SymbologyCache,
|
||||
open_symcache,
|
||||
get_symcache,
|
||||
match_from_pairs,
|
||||
)
|
||||
from ._sampling import open_sample_stream
|
||||
from ..types import Struct
|
||||
|
||||
|
||||
__all__: list[str] = [
|
||||
|
@ -62,6 +64,7 @@ __all__: list[str] = [
|
|||
'open_symcache',
|
||||
'open_sample_stream',
|
||||
'get_symcache',
|
||||
'Struct',
|
||||
'SymbologyCache',
|
||||
'types',
|
||||
]
|
||||
|
|
|
@ -308,7 +308,7 @@ class SymbologyCache(Struct):
|
|||
matches in a `dict` including the `MktPair` values.
|
||||
|
||||
'''
|
||||
matches = fuzzy.extractBests(
|
||||
matches = fuzzy.extract(
|
||||
pattern,
|
||||
getattr(self, table),
|
||||
score_cutoff=50,
|
||||
|
@ -466,3 +466,41 @@ def get_symcache(
|
|||
pdbp.xpm()
|
||||
|
||||
return symcache
|
||||
|
||||
|
||||
def match_from_pairs(
|
||||
pairs: dict[str, Struct],
|
||||
query: str,
|
||||
score_cutoff: int = 50,
|
||||
|
||||
) -> dict[str, Struct]:
|
||||
'''
|
||||
Fuzzy search over a "pairs table" maintained by most backends
|
||||
as part of their symbology-info caching internals.
|
||||
|
||||
Scan the native symbol key set and return best ranked
|
||||
matches back in a new `dict`.
|
||||
|
||||
'''
|
||||
|
||||
# TODO: somehow cache this list (per call) like we were in
|
||||
# `open_symbol_search()`?
|
||||
keys: list[str] = list(pairs)
|
||||
matches: list[tuple[
|
||||
Sequence[Hashable], # matching input key
|
||||
Any, # scores
|
||||
Any,
|
||||
]] = fuzzy.extract(
|
||||
# NOTE: most backends provide keys uppercased
|
||||
query=query,
|
||||
choices=keys,
|
||||
score_cutoff=score_cutoff,
|
||||
)
|
||||
|
||||
# pop and repack pairs in output dict
|
||||
matched_pairs: dict[str, Pair] = {}
|
||||
for item in matches:
|
||||
pair_key: str = item[0]
|
||||
matched_pairs[pair_key] = pairs[pair_key]
|
||||
|
||||
return matched_pairs
|
||||
|
|
Loading…
Reference in New Issue