Add `data.match_from_pairs` fuzzy symbology scanner
A helper for scanning a "pairs table" that most backends should expose as part of their (internal) symbology set using `rapidfuzz` over a `dict[str, Struct]` input table. Also expose the `data.types.Struct` at the subpkg top level.ib_py311_fixes
parent
a97a0ced8c
commit
0ba75df877
|
@ -43,8 +43,10 @@ from ._symcache import (
|
||||||
SymbologyCache,
|
SymbologyCache,
|
||||||
open_symcache,
|
open_symcache,
|
||||||
get_symcache,
|
get_symcache,
|
||||||
|
match_from_pairs,
|
||||||
)
|
)
|
||||||
from ._sampling import open_sample_stream
|
from ._sampling import open_sample_stream
|
||||||
|
from ..types import Struct
|
||||||
|
|
||||||
|
|
||||||
__all__: list[str] = [
|
__all__: list[str] = [
|
||||||
|
@ -62,6 +64,7 @@ __all__: list[str] = [
|
||||||
'open_symcache',
|
'open_symcache',
|
||||||
'open_sample_stream',
|
'open_sample_stream',
|
||||||
'get_symcache',
|
'get_symcache',
|
||||||
|
'Struct',
|
||||||
'SymbologyCache',
|
'SymbologyCache',
|
||||||
'types',
|
'types',
|
||||||
]
|
]
|
||||||
|
|
|
@ -308,7 +308,7 @@ class SymbologyCache(Struct):
|
||||||
matches in a `dict` including the `MktPair` values.
|
matches in a `dict` including the `MktPair` values.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
matches = fuzzy.extractBests(
|
matches = fuzzy.extract(
|
||||||
pattern,
|
pattern,
|
||||||
getattr(self, table),
|
getattr(self, table),
|
||||||
score_cutoff=50,
|
score_cutoff=50,
|
||||||
|
@ -466,3 +466,41 @@ def get_symcache(
|
||||||
pdbp.xpm()
|
pdbp.xpm()
|
||||||
|
|
||||||
return symcache
|
return symcache
|
||||||
|
|
||||||
|
|
||||||
|
def match_from_pairs(
|
||||||
|
pairs: dict[str, Struct],
|
||||||
|
query: str,
|
||||||
|
score_cutoff: int = 50,
|
||||||
|
|
||||||
|
) -> dict[str, Struct]:
|
||||||
|
'''
|
||||||
|
Fuzzy search over a "pairs table" maintained by most backends
|
||||||
|
as part of their symbology-info caching internals.
|
||||||
|
|
||||||
|
Scan the native symbol key set and return best ranked
|
||||||
|
matches back in a new `dict`.
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
# TODO: somehow cache this list (per call) like we were in
|
||||||
|
# `open_symbol_search()`?
|
||||||
|
keys: list[str] = list(pairs)
|
||||||
|
matches: list[tuple[
|
||||||
|
Sequence[Hashable], # matching input key
|
||||||
|
Any, # scores
|
||||||
|
Any,
|
||||||
|
]] = fuzzy.extract(
|
||||||
|
# NOTE: most backends provide keys uppercased
|
||||||
|
query=query,
|
||||||
|
choices=keys,
|
||||||
|
score_cutoff=score_cutoff,
|
||||||
|
)
|
||||||
|
|
||||||
|
# pop and repack pairs in output dict
|
||||||
|
matched_pairs: dict[str, Pair] = {}
|
||||||
|
for item in matches:
|
||||||
|
pair_key: str = item[0]
|
||||||
|
matched_pairs[pair_key] = pairs[pair_key]
|
||||||
|
|
||||||
|
return matched_pairs
|
||||||
|
|
Loading…
Reference in New Issue