Be mega pedantic with msg-spec building

Turns out the generics based payload speccing API, as in
https://jcristharif.com/msgspec/supported-types.html#generic-types,
DOES WORK properly as long as we don't rely on inheritance from `Msg`
a parent `Generic`..

So let's get real pedantic in the `mk_msg_spec()` internals as well as
verification in the test suite!

Fixes in `.msg.types`:
- implement (as part of tinker testing) multiple spec union building
  methods via a `spec_build_method: str` to `mk_msg_spec()` and leave a
  buncha notes around what did and didn't work:
  - 'indexed_generics' is the only method THAT WORKS and the one that
    you'd expect being closest to the `msgspec` docs (link above).
  - 'defstruct' using dynamically defined msgs => doesn't work!
  - 'types_new_class' using dynamically defined msgs but with
    `types.new_clas()` => ALSO doesn't work..

- explicitly separate the `.pld` type-constrainable by user code msg
  set into `types._payload_spec_msgs` putting the others in
  a `types._runtime_spec_msgs` and the full set defined as `.__spec__`
  (moving it out of the pkg-mod and back to `.types` as well).

- for the `_payload_spec_msgs` msgs manually make them inherit `Generic[PayloadT]`
  and (redunantly) define a `.pld: PayloadT` field.

- make `IpcCtxSpec.functype` an in line `Literal`.

- toss in some TODO notes about choosing a better `Msg.cid` type.

Fixes/tweaks around `.msg._codec`:
- rename `MsgCodec.ipc/pld_msg_spec` -> `.msg/pld_spec`
- make `._enc/._dec` non optional fields
- wow, ^facepalm^ , make sure `._ipc.MsgpackTCPStream.__init__()` uses
  `mk_codec()` since `MsgCodec` can't be (easily) constructed directly.

Get more detailed in testing:
- inside the `chk_pld_type()` helper ensure `roundtrip` is always set to
  some value, `None` by default but a bool depending on legit outcome.
  - drop input `generic`; no longer used.
  - drop the masked `typedef` loop from `Msg.__subclasses__()`.
  - for add an `expect_roundtrip: bool` and use to jump into debugger
    when any expectation doesn't match the outcome.
- use new `MsgCodec` field names (as per first section above).
- ensure the encoded msg matches the decoded one from both the ad-hoc
  decoder and codec loaded values.
- ensure the pld checking is only applied to msgs in the
  `types._payload_spec_msgs` set by `typef.__name__` filtering
  since `mk_msg_spec()` now returns the full `.types.Msg` set.
msg_codecs
Tyler Goodlet 2024-03-29 18:46:37 -04:00
parent fb8196e354
commit 3ba46362a9
5 changed files with 289 additions and 154 deletions

View File

@ -35,6 +35,7 @@ from tractor.msg import (
apply_codec,
current_msgspec_codec,
)
from tractor.msg import types
from tractor.msg.types import (
# PayloadT,
Msg,
@ -235,31 +236,15 @@ def test_codec_hooks_mod():
def chk_pld_type(
generic: Msg|_GenericAlias,
payload_type: Type[Struct]|Any,
payload_spec: Type[Struct]|Any,
pld: Any,
expect_roundtrip: bool|None = None,
) -> bool:
roundtrip: bool = False
pld_val_type: Type = type(pld)
# gen_paramed: _GenericAlias = generic[payload_type]
# for typedef in (
# [gen_paramed]
# +
# # type-var should always be set for these sub-types
# # as well!
# Msg.__subclasses__()
# ):
# if typedef.__name__ not in [
# 'Msg',
# 'Started',
# 'Yield',
# 'Return',
# ]:
# continue
# TODO: verify that the overridden subtypes
# DO NOT have modified type-annots from original!
# 'Start', .pld: FuncSpec
@ -267,48 +252,64 @@ def chk_pld_type(
# 'Stop', .pld: UNSEt
# 'Error', .pld: ErrorData
pld_type_spec: Union[Type[Struct]]
msg_types: list[Msg[payload_type]]
# make a one-off dec to compare with our `MsgCodec` instance
# which does the below `mk_msg_spec()` call internally
(
pld_type_spec,
msg_types,
) = mk_msg_spec(
payload_type_union=payload_type,
)
enc = msgpack.Encoder()
dec = msgpack.Decoder(
type=pld_type_spec or Any, # like `Msg[Any]`
)
codec: MsgCodec = mk_codec(
# NOTE: this ONLY accepts `Msg.pld` fields of a specified
# type union.
ipc_pld_spec=payload_type,
ipc_pld_spec=payload_spec,
)
# make a one-off dec to compare with our `MsgCodec` instance
# which does the below `mk_msg_spec()` call internally
ipc_msg_spec: Union[Type[Struct]]
msg_types: list[Msg[payload_spec]]
(
ipc_msg_spec,
msg_types,
) = mk_msg_spec(
payload_type_union=payload_spec,
)
_enc = msgpack.Encoder()
_dec = msgpack.Decoder(
type=ipc_msg_spec or Any, # like `Msg[Any]`
)
assert (
payload_spec
==
codec.pld_spec
)
# assert codec.dec == dec
# XXX-^ not sure why these aren't "equal" but when cast
#
# ^-XXX-^ not sure why these aren't "equal" but when cast
# to `str` they seem to match ?? .. kk
assert (
str(pld_type_spec)
str(ipc_msg_spec)
==
str(codec.ipc_pld_spec)
str(codec.msg_spec)
==
str(dec.type)
str(_dec.type)
==
str(codec.dec.type)
)
# verify the boxed-type for all variable payload-type msgs.
if not msg_types:
breakpoint()
roundtrip: bool|None = None
pld_spec_msg_names: list[str] = [
td.__name__ for td in types._payload_spec_msgs
]
for typedef in msg_types:
skip_runtime_msg: bool = typedef.__name__ not in pld_spec_msg_names
if skip_runtime_msg:
continue
pld_field = structs.fields(typedef)[1]
assert pld_field.type is payload_type
# TODO-^ does this need to work to get all subtypes to adhere?
assert pld_field.type is payload_spec # TODO-^ does this need to work to get all subtypes to adhere?
kwargs: dict[str, Any] = {
'cid': '666',
@ -316,44 +317,72 @@ def chk_pld_type(
}
enc_msg: Msg = typedef(**kwargs)
_wire_bytes: bytes = _enc.encode(enc_msg)
wire_bytes: bytes = codec.enc.encode(enc_msg)
_wire_bytes: bytes = enc.encode(enc_msg)
assert _wire_bytes == wire_bytes
ve: ValidationError|None = None
try:
_dec_msg = dec.decode(wire_bytes)
dec_msg = codec.dec.decode(wire_bytes)
_dec_msg = _dec.decode(wire_bytes)
assert dec_msg.pld == pld
assert _dec_msg.pld == pld
assert (roundtrip := (_dec_msg == enc_msg))
# decoded msg and thus payload should be exactly same!
assert (roundtrip := (
_dec_msg
==
dec_msg
==
enc_msg
))
except ValidationError as ve:
if pld_val_type is payload_type:
if (
expect_roundtrip is not None
and expect_roundtrip != roundtrip
):
breakpoint()
assert (
pld
==
dec_msg.pld
==
enc_msg.pld
)
# assert (roundtrip := (_dec_msg == enc_msg))
except ValidationError as _ve:
ve = _ve
roundtrip: bool = False
if pld_val_type is payload_spec:
raise ValueError(
'Got `ValidationError` despite type-var match!?\n'
f'pld_val_type: {pld_val_type}\n'
f'payload_type: {payload_type}\n'
f'payload_type: {payload_spec}\n'
) from ve
else:
# ow we good cuz the pld spec mismatched.
print(
'Got expected `ValidationError` since,\n'
f'{pld_val_type} is not {payload_type}\n'
f'{pld_val_type} is not {payload_spec}\n'
)
else:
if (
pld_val_type is not payload_type
and payload_type is not Any
payload_spec is not Any
and
pld_val_type is not payload_spec
):
raise ValueError(
'DID NOT `ValidationError` despite expected type match!?\n'
f'pld_val_type: {pld_val_type}\n'
f'payload_type: {payload_type}\n'
f'payload_type: {payload_spec}\n'
)
return roundtrip
# full code decode should always be attempted!
if roundtrip is None:
breakpoint()
return roundtrip
def test_limit_msgspec():
@ -365,9 +394,10 @@ def test_limit_msgspec():
# ensure we can round-trip a boxing `Msg`
assert chk_pld_type(
Msg,
# Msg,
Any,
None,
expect_roundtrip=True,
)
# TODO: don't need this any more right since
@ -379,7 +409,7 @@ def test_limit_msgspec():
# verify that a mis-typed payload value won't decode
assert not chk_pld_type(
Msg,
# Msg,
int,
pld='doggy',
)
@ -392,13 +422,13 @@ def test_limit_msgspec():
value: Any
assert not chk_pld_type(
Msg,
# Msg,
CustomPayload,
pld='doggy',
)
assert chk_pld_type(
Msg,
# Msg,
CustomPayload,
pld=CustomPayload(name='doggy', value='urmom')
)

View File

@ -48,6 +48,7 @@ from tractor._exceptions import TransportClosed
from tractor.msg import (
_ctxvar_MsgCodec,
MsgCodec,
mk_codec,
)
log = get_logger(__name__)
@ -162,7 +163,7 @@ class MsgpackTCPStream(MsgTransport):
# allow for custom IPC msg interchange format
# dynamic override Bo
self.codec: MsgCodec = codec or MsgCodec()
self.codec: MsgCodec = codec or mk_codec()
async def _iter_packets(self) -> AsyncGenerator[dict, None]:
'''

View File

@ -37,36 +37,20 @@ from ._codec import (
from .types import (
Msg as Msg,
Start, # with pld
Start as Start, # with pld
FuncSpec as FuncSpec,
StartAck, # with pld
StartAck as StartAck, # with pld
IpcCtxSpec as IpcCtxSpec,
Started,
Yield,
Stop,
Return,
Started as Started,
Yield as Yield,
Stop as Stop,
Return as Return,
Error, # with pld
ErrorData as ErrorData
Error as Error, # with pld
ErrorData as ErrorData,
# full msg spec set
__spec__ as __spec__,
)
# built-in SC shuttle protocol msg type set in
# approx order of the IPC txn-state spaces.
__spec__: list[Msg] = [
# inter-actor RPC initiation
Start,
StartAck,
# no-outcome-yet IAC (inter-actor-communication)
Started,
Yield,
Stop,
# termination outcomes
Return,
Error,
]

View File

@ -73,16 +73,15 @@ class MsgCodec(Struct):
A IPC msg interchange format lib's encoder + decoder pair.
'''
# post-configure-cached when prop-accessed (see `mk_codec()`
# OR can be passed directly as,
# `MsgCodec(_enc=<Encoder>, _dec=<Decoder>)`
_enc: msgpack.Encoder|None = None
_dec: msgpack.Decoder|None = None
_enc: msgpack.Encoder
_dec: msgpack.Decoder
pld_spec: Union[Type[Struct]]|None
# struct type unions
# https://jcristharif.com/msgspec/structs.html#tagged-unions
@property
def ipc_pld_spec(self) -> Union[Type[Struct]]:
def msg_spec(self) -> Union[Type[Struct]]:
return self._dec.type
lib: ModuleType = msgspec
@ -142,6 +141,7 @@ class MsgCodec(Struct):
determined by the
'''
# https://jcristharif.com/msgspec/usage.html#typed-decoding
return self._dec.decode(msg)
# TODO: do we still want to try and support the sub-decoder with
@ -149,6 +149,7 @@ class MsgCodec(Struct):
# future grief?
#
# -[ ] <NEW-ISSUE-FOR-ThIS-HERE>
# -> https://jcristharif.com/msgspec/api.html#raw
#
#def mk_pld_subdec(
# self,
@ -224,6 +225,20 @@ class MsgCodec(Struct):
# return codec.enc.encode(msg)
# TODO: sub-decoded `Raw` fields?
# -[ ] see `MsgCodec._payload_decs` notes
#
# XXX if we wanted something more complex then field name str-keys
# we might need a header field type to describe the lookup sys?
# class Header(Struct, tag=True):
# '''
# A msg header which defines payload properties
# '''
# payload_tag: str|None = None
#def mk_tagged_union_dec(
# tagged_structs: list[Struct],
@ -345,10 +360,6 @@ def mk_codec(
assert len(ipc_msg_spec.__args__) == len(msg_types)
assert ipc_msg_spec
dec = msgpack.Decoder(
type=ipc_msg_spec, # like `Msg[Any]`
)
else:
ipc_msg_spec = ipc_msg_spec or Any
@ -363,6 +374,7 @@ def mk_codec(
codec = MsgCodec(
_enc=enc,
_dec=dec,
pld_spec=ipc_pld_spec,
# payload_msg_specs=payload_msg_specs,
# **kwargs,
)

View File

@ -34,20 +34,13 @@ from typing import (
)
from msgspec import (
defstruct,
# field,
Struct,
UNSET,
UnsetType,
)
# TODO: sub-decoded `Raw` fields?
# -[ ] see `MsgCodec._payload_decs` notes
#
# class Header(Struct, tag=True):
# '''
# A msg header which defines payload properties
# '''
# payload_tag: str|None = None
# type variable for the boxed payload field `.pld`
PayloadT = TypeVar('PayloadT')
@ -57,6 +50,9 @@ class Msg(
Generic[PayloadT],
tag=True,
tag_field='msg_type',
# eq=True,
# order=True,
):
'''
The "god" boxing msg type.
@ -66,8 +62,13 @@ class Msg(
tree.
'''
# TODO: use UNSET here?
cid: str|None # call/context-id
# ^-TODO-^: more explicit type?
# -[ ] use UNSET here?
# https://jcristharif.com/msgspec/supported-types.html#unset
#
# -[ ] `uuid.UUID` which has multi-protocol support
# https://jcristharif.com/msgspec/supported-types.html#uuid
# The msgs "payload" (spelled without vowels):
# https://en.wikipedia.org/wiki/Payload_(computing)
@ -136,19 +137,18 @@ class Start(
pld: FuncSpec
FuncType: Literal[
'asyncfunc',
'asyncgen',
'context', # TODO: the only one eventually?
] = 'context'
class IpcCtxSpec(Struct):
'''
An inter-actor-`trio.Task`-comms `Context` spec.
'''
functype: FuncType
# TODO: maybe better names for all these?
# -[ ] obvi ^ would need sync with `._rpc`
functype: Literal[
'asyncfunc',
'asyncgen',
'context', # TODO: the only one eventually?
]
# TODO: as part of the reponse we should report our allowed
# msg spec which should be generated from the type-annots as
@ -182,6 +182,7 @@ class Started(
decorated IPC endpoint.
'''
pld: PayloadT
# TODO: instead of using our existing `Start`
@ -198,6 +199,7 @@ class Yield(
Per IPC transmission of a value from `await MsgStream.send(<value>)`.
'''
pld: PayloadT
class Stop(Msg):
@ -206,7 +208,7 @@ class Stop(Msg):
of `StopAsyncIteration`.
'''
pld: UNSET
pld: UnsetType = UNSET
class Return(
@ -218,6 +220,7 @@ class Return(
func-as-`trio.Task`.
'''
pld: PayloadT
class ErrorData(Struct):
@ -258,13 +261,47 @@ class Error(Msg):
# cid: str
def mk_msg_spec(
payload_type_union: Union[Type] = Any,
boxing_msg_set: set[Msg] = {
# built-in SC shuttle protocol msg type set in
# approx order of the IPC txn-state spaces.
__spec__: list[Msg] = [
# inter-actor RPC initiation
Start,
StartAck,
# no-outcome-yet IAC (inter-actor-communication)
Started,
Yield,
Stop,
# termination outcomes
Return,
Error,
]
_runtime_spec_msgs: list[Msg] = [
Start,
StartAck,
Stop,
Error,
]
_payload_spec_msgs: list[Msg] = [
Started,
Yield,
Return,
},
]
def mk_msg_spec(
payload_type_union: Union[Type] = Any,
# boxing_msg_set: list[Msg] = _payload_spec_msgs,
spec_build_method: Literal[
'indexed_generics', # works
'defstruct',
'types_new_class',
] = 'indexed_generics',
) -> tuple[
Union[Type[Msg]],
@ -281,26 +318,58 @@ def mk_msg_spec(
'''
submsg_types: list[Type[Msg]] = Msg.__subclasses__()
bases: tuple = (
# XXX NOTE XXX the below generic-parameterization seems to
# be THE ONLY way to get this to work correctly in terms
# of getting ValidationError on a roundtrip?
Msg[payload_type_union],
Generic[PayloadT],
)
defstruct_bases: tuple = (
Msg, # [payload_type_union],
# Generic[PayloadT],
# ^-XXX-^: not allowed? lul..
)
ipc_msg_types: list[Msg] = []
# TODO: see below as well,
# => union building approach with `.__class_getitem__()`
# doesn't seem to work..?
#
# payload_type_spec: Union[Type[Msg]]
#
msg_types: list[Msg] = []
for msgtype in boxing_msg_set:
idx_msg_types: list[Msg] = []
defs_msg_types: list[Msg] = []
nc_msg_types: list[Msg] = []
for msgtype in __spec__:
# for the NON-payload (user api) type specify-able
# msgs types, we simply aggregate the def as is
# for inclusion in the output type `Union`.
if msgtype not in _payload_spec_msgs:
ipc_msg_types.append(msgtype)
continue
# check inheritance sanity
assert msgtype in submsg_types
# TODO: wait why do we need the dynamic version here?
# -[ ] paraming the `PayloadT` values via `Generic[T]`
# doesn't seem to work at all?
# -[ ] is there a way to get it to work at module level
# just using inheritance or maybe a metaclass?
# XXX ANSWER XXX -> BC INHERITANCE.. don't work w generics..
#
# index_paramed_msg_type: Msg = msgtype[payload_type_union]
# NOTE previously bc msgtypes WERE NOT inheritting
# directly the `Generic[PayloadT]` type, the manual method
# of generic-paraming with `.__class_getitem__()` wasn't
# working..
#
# XXX but bc i changed that to make every subtype inherit
# it, this manual "indexed parameterization" method seems
# to work?
#
# -[x] paraming the `PayloadT` values via `Generic[T]`
# does work it seems but WITHOUT inheritance of generics
#
# -[-] is there a way to get it to work at module level
# just using inheritance or maybe a metaclass?
# => thot that `defstruct` might work, but NOPE, see
# below..
#
idxed_msg_type: Msg = msgtype[payload_type_union]
idx_msg_types.append(idxed_msg_type)
# TODO: WHY do we need to dynamically generate the
# subtype-msgs here to ensure the `.pld` parameterization
@ -308,30 +377,69 @@ def mk_msg_spec(
# `msgpack.Decoder()`..?
#
# dynamically create the payload type-spec-limited msg set.
manual_paramed_msg_subtype: Type = types.new_class(
msgtype.__name__,
(
# XXX NOTE XXX this seems to be THE ONLY
# way to get this to work correctly!?!
Msg[payload_type_union],
Generic[PayloadT],
),
{},
newclass_msgtype: Type = types.new_class(
name=msgtype.__name__,
bases=bases,
kwds={},
)
nc_msg_types.append(
newclass_msgtype[payload_type_union]
)
# TODO: grok the diff here better..
# with `msgspec.structs.defstruct`
# XXX ALSO DOESN'T WORK
defstruct_msgtype = defstruct(
name=msgtype.__name__,
fields=[
('cid', str),
# XXX doesn't seem to work..
# ('pld', PayloadT),
('pld', payload_type_union),
],
bases=defstruct_bases,
)
defs_msg_types.append(defstruct_msgtype)
# assert index_paramed_msg_type == manual_paramed_msg_subtype
# XXX TODO: why does the manual method work but not the
# `.__class_getitem__()` one!?!
paramed_msg_type = manual_paramed_msg_subtype
# paramed_msg_type = manual_paramed_msg_subtype
# payload_type_spec |= paramed_msg_type
msg_types.append(paramed_msg_type)
# ipc_payload_msgs_type_union |= index_paramed_msg_type
idx_spec: Union[Type[Msg]] = Union[*idx_msg_types]
def_spec: Union[Type[Msg]] = Union[*defs_msg_types]
nc_spec: Union[Type[Msg]] = Union[*nc_msg_types]
specs: dict[str, Union[Type[Msg]]] = {
'indexed_generics': idx_spec,
'defstruct': def_spec,
'types_new_class': nc_spec,
}
msgtypes_table: dict[str, list[Msg]] = {
'indexed_generics': idx_msg_types,
'defstruct': defs_msg_types,
'types_new_class': nc_msg_types,
}
# XXX lol apparently type unions can't ever
# be equal eh?
# TODO: grok the diff here better..
#
# assert (
# idx_spec
# ==
# nc_spec
# ==
# def_spec
# )
# breakpoint()
pld_spec: Union[Type] = specs[spec_build_method]
runtime_spec: Union[Type] = Union[*ipc_msg_types]
payload_type_spec: Union[Type[Msg]] = Union[*msg_types]
return (
payload_type_spec,
msg_types,
pld_spec | runtime_spec,
msgtypes_table[spec_build_method] + ipc_msg_types,
)