From cb728e3bd67333530951328dac97e83d61ff46b3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Mar 2024 18:46:37 -0400 Subject: [PATCH] Be mega pedantic with msg-spec building Turns out the generics based payload speccing API, as in https://jcristharif.com/msgspec/supported-types.html#generic-types, DOES WORK properly as long as we don't rely on inheritance from `Msg` a parent `Generic`.. So let's get real pedantic in the `mk_msg_spec()` internals as well as verification in the test suite! Fixes in `.msg.types`: - implement (as part of tinker testing) multiple spec union building methods via a `spec_build_method: str` to `mk_msg_spec()` and leave a buncha notes around what did and didn't work: - 'indexed_generics' is the only method THAT WORKS and the one that you'd expect being closest to the `msgspec` docs (link above). - 'defstruct' using dynamically defined msgs => doesn't work! - 'types_new_class' using dynamically defined msgs but with `types.new_clas()` => ALSO doesn't work.. - explicitly separate the `.pld` type-constrainable by user code msg set into `types._payload_spec_msgs` putting the others in a `types._runtime_spec_msgs` and the full set defined as `.__spec__` (moving it out of the pkg-mod and back to `.types` as well). - for the `_payload_spec_msgs` msgs manually make them inherit `Generic[PayloadT]` and (redunantly) define a `.pld: PayloadT` field. - make `IpcCtxSpec.functype` an in line `Literal`. - toss in some TODO notes about choosing a better `Msg.cid` type. Fixes/tweaks around `.msg._codec`: - rename `MsgCodec.ipc/pld_msg_spec` -> `.msg/pld_spec` - make `._enc/._dec` non optional fields - wow, ^facepalm^ , make sure `._ipc.MsgpackTCPStream.__init__()` uses `mk_codec()` since `MsgCodec` can't be (easily) constructed directly. Get more detailed in testing: - inside the `chk_pld_type()` helper ensure `roundtrip` is always set to some value, `None` by default but a bool depending on legit outcome. - drop input `generic`; no longer used. - drop the masked `typedef` loop from `Msg.__subclasses__()`. - for add an `expect_roundtrip: bool` and use to jump into debugger when any expectation doesn't match the outcome. - use new `MsgCodec` field names (as per first section above). - ensure the encoded msg matches the decoded one from both the ad-hoc decoder and codec loaded values. - ensure the pld checking is only applied to msgs in the `types._payload_spec_msgs` set by `typef.__name__` filtering since `mk_msg_spec()` now returns the full `.types.Msg` set. --- tests/test_caps_based_msging.py | 149 ++++++++++++--------- tractor/_ipc.py | 3 +- tractor/msg/__init__.py | 38 ++---- tractor/msg/_codec.py | 32 +++-- tractor/msg/types.py | 220 ++++++++++++++++++++++++-------- 5 files changed, 288 insertions(+), 154 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index 98ab7fa..f9c3daa 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -35,6 +35,7 @@ from tractor.msg import ( apply_codec, current_msgspec_codec, ) +from tractor.msg import types from tractor.msg.types import ( # PayloadT, Msg, @@ -235,31 +236,15 @@ def test_codec_hooks_mod(): def chk_pld_type( - generic: Msg|_GenericAlias, - payload_type: Type[Struct]|Any, + payload_spec: Type[Struct]|Any, pld: Any, + expect_roundtrip: bool|None = None, + ) -> bool: - roundtrip: bool = False pld_val_type: Type = type(pld) - # gen_paramed: _GenericAlias = generic[payload_type] - # for typedef in ( - # [gen_paramed] - # + - # # type-var should always be set for these sub-types - # # as well! - # Msg.__subclasses__() - # ): - # if typedef.__name__ not in [ - # 'Msg', - # 'Started', - # 'Yield', - # 'Return', - # ]: - # continue - # TODO: verify that the overridden subtypes # DO NOT have modified type-annots from original! # 'Start', .pld: FuncSpec @@ -267,48 +252,63 @@ def chk_pld_type( # 'Stop', .pld: UNSEt # 'Error', .pld: ErrorData - - pld_type_spec: Union[Type[Struct]] - msg_types: list[Msg[payload_type]] - - # make a one-off dec to compare with our `MsgCodec` instance - # which does the below `mk_msg_spec()` call internally - ( - pld_type_spec, - msg_types, - ) = mk_msg_spec( - payload_type_union=payload_type, - ) - enc = msgpack.Encoder() - dec = msgpack.Decoder( - type=pld_type_spec or Any, # like `Msg[Any]` - ) - codec: MsgCodec = mk_codec( # NOTE: this ONLY accepts `Msg.pld` fields of a specified # type union. - ipc_pld_spec=payload_type, + ipc_pld_spec=payload_spec, + ) + + # make a one-off dec to compare with our `MsgCodec` instance + # which does the below `mk_msg_spec()` call internally + ipc_msg_spec: Union[Type[Struct]] + msg_types: list[Msg[payload_spec]] + ( + ipc_msg_spec, + msg_types, + ) = mk_msg_spec( + payload_type_union=payload_spec, + ) + _enc = msgpack.Encoder() + _dec = msgpack.Decoder( + type=ipc_msg_spec or Any, # like `Msg[Any]` + ) + + assert ( + payload_spec + == + codec.pld_spec ) # assert codec.dec == dec - # XXX-^ not sure why these aren't "equal" but when cast + # + # ^-XXX-^ not sure why these aren't "equal" but when cast # to `str` they seem to match ?? .. kk + assert ( - str(pld_type_spec) + str(ipc_msg_spec) == - str(codec.ipc_pld_spec) + str(codec.msg_spec) == - str(dec.type) + str(_dec.type) == str(codec.dec.type) ) # verify the boxed-type for all variable payload-type msgs. + if not msg_types: + breakpoint() + + roundtrip: bool|None = None + pld_spec_msg_names: list[str] = [ + td.__name__ for td in types._payload_spec_msgs + ] for typedef in msg_types: + skip_runtime_msg: bool = typedef.__name__ not in pld_spec_msg_names + if skip_runtime_msg: + continue pld_field = structs.fields(typedef)[1] - assert pld_field.type is payload_type - # TODO-^ does this need to work to get all subtypes to adhere? + assert pld_field.type is payload_spec # TODO-^ does this need to work to get all subtypes to adhere? kwargs: dict[str, Any] = { 'cid': '666', @@ -316,44 +316,72 @@ def chk_pld_type( } enc_msg: Msg = typedef(**kwargs) + _wire_bytes: bytes = _enc.encode(enc_msg) wire_bytes: bytes = codec.enc.encode(enc_msg) - _wire_bytes: bytes = enc.encode(enc_msg) + assert _wire_bytes == wire_bytes + ve: ValidationError|None = None try: - _dec_msg = dec.decode(wire_bytes) dec_msg = codec.dec.decode(wire_bytes) + _dec_msg = _dec.decode(wire_bytes) - assert dec_msg.pld == pld - assert _dec_msg.pld == pld - assert (roundtrip := (_dec_msg == enc_msg)) + # decoded msg and thus payload should be exactly same! + assert (roundtrip := ( + _dec_msg + == + dec_msg + == + enc_msg + )) - except ValidationError as ve: - if pld_val_type is payload_type: + if ( + expect_roundtrip is not None + and expect_roundtrip != roundtrip + ): + breakpoint() + + assert ( + pld + == + dec_msg.pld + == + enc_msg.pld + ) + # assert (roundtrip := (_dec_msg == enc_msg)) + + except ValidationError as _ve: + ve = _ve + roundtrip: bool = False + if pld_val_type is payload_spec: raise ValueError( 'Got `ValidationError` despite type-var match!?\n' f'pld_val_type: {pld_val_type}\n' - f'payload_type: {payload_type}\n' + f'payload_type: {payload_spec}\n' ) from ve else: # ow we good cuz the pld spec mismatched. print( 'Got expected `ValidationError` since,\n' - f'{pld_val_type} is not {payload_type}\n' + f'{pld_val_type} is not {payload_spec}\n' ) else: if ( - pld_val_type is not payload_type - and payload_type is not Any + payload_spec is not Any + and + pld_val_type is not payload_spec ): raise ValueError( 'DID NOT `ValidationError` despite expected type match!?\n' f'pld_val_type: {pld_val_type}\n' - f'payload_type: {payload_type}\n' + f'payload_type: {payload_spec}\n' ) - return roundtrip + # full code decode should always be attempted! + if roundtrip is None: + breakpoint() + return roundtrip def test_limit_msgspec(): @@ -365,9 +393,10 @@ def test_limit_msgspec(): # ensure we can round-trip a boxing `Msg` assert chk_pld_type( - Msg, + # Msg, Any, None, + expect_roundtrip=True, ) # TODO: don't need this any more right since @@ -379,7 +408,7 @@ def test_limit_msgspec(): # verify that a mis-typed payload value won't decode assert not chk_pld_type( - Msg, + # Msg, int, pld='doggy', ) @@ -392,13 +421,13 @@ def test_limit_msgspec(): value: Any assert not chk_pld_type( - Msg, + # Msg, CustomPayload, pld='doggy', ) assert chk_pld_type( - Msg, + # Msg, CustomPayload, pld=CustomPayload(name='doggy', value='urmom') ) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index b1c2ccd..5f71c38 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -48,6 +48,7 @@ from tractor._exceptions import TransportClosed from tractor.msg import ( _ctxvar_MsgCodec, MsgCodec, + mk_codec, ) log = get_logger(__name__) @@ -162,7 +163,7 @@ class MsgpackTCPStream(MsgTransport): # allow for custom IPC msg interchange format # dynamic override Bo - self.codec: MsgCodec = codec or MsgCodec() + self.codec: MsgCodec = codec or mk_codec() async def _iter_packets(self) -> AsyncGenerator[dict, None]: ''' diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index a93fa88..0c8809a 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -37,36 +37,20 @@ from ._codec import ( from .types import ( Msg as Msg, - Start, # with pld + Start as Start, # with pld FuncSpec as FuncSpec, - StartAck, # with pld + StartAck as StartAck, # with pld IpcCtxSpec as IpcCtxSpec, - Started, - Yield, - Stop, - Return, + Started as Started, + Yield as Yield, + Stop as Stop, + Return as Return, - Error, # with pld - ErrorData as ErrorData + Error as Error, # with pld + ErrorData as ErrorData, + + # full msg spec set + __spec__ as __spec__, ) - - -# built-in SC shuttle protocol msg type set in -# approx order of the IPC txn-state spaces. -__spec__: list[Msg] = [ - - # inter-actor RPC initiation - Start, - StartAck, - - # no-outcome-yet IAC (inter-actor-communication) - Started, - Yield, - Stop, - - # termination outcomes - Return, - Error, -] diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index e6cb4f1..4477d39 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -73,16 +73,15 @@ class MsgCodec(Struct): A IPC msg interchange format lib's encoder + decoder pair. ''' - # post-configure-cached when prop-accessed (see `mk_codec()` - # OR can be passed directly as, - # `MsgCodec(_enc=, _dec=)` - _enc: msgpack.Encoder|None = None - _dec: msgpack.Decoder|None = None + _enc: msgpack.Encoder + _dec: msgpack.Decoder + + pld_spec: Union[Type[Struct]]|None # struct type unions # https://jcristharif.com/msgspec/structs.html#tagged-unions @property - def ipc_pld_spec(self) -> Union[Type[Struct]]: + def msg_spec(self) -> Union[Type[Struct]]: return self._dec.type lib: ModuleType = msgspec @@ -142,6 +141,7 @@ class MsgCodec(Struct): determined by the ''' + # https://jcristharif.com/msgspec/usage.html#typed-decoding return self._dec.decode(msg) # TODO: do we still want to try and support the sub-decoder with @@ -149,6 +149,7 @@ class MsgCodec(Struct): # future grief? # # -[ ] + # -> https://jcristharif.com/msgspec/api.html#raw # #def mk_pld_subdec( # self, @@ -224,6 +225,20 @@ class MsgCodec(Struct): # return codec.enc.encode(msg) + +# TODO: sub-decoded `Raw` fields? +# -[ ] see `MsgCodec._payload_decs` notes +# +# XXX if we wanted something more complex then field name str-keys +# we might need a header field type to describe the lookup sys? +# class Header(Struct, tag=True): +# ''' +# A msg header which defines payload properties + +# ''' +# payload_tag: str|None = None + + #def mk_tagged_union_dec( # tagged_structs: list[Struct], @@ -345,10 +360,6 @@ def mk_codec( assert len(ipc_msg_spec.__args__) == len(msg_types) assert ipc_msg_spec - dec = msgpack.Decoder( - type=ipc_msg_spec, # like `Msg[Any]` - ) - else: ipc_msg_spec = ipc_msg_spec or Any @@ -363,6 +374,7 @@ def mk_codec( codec = MsgCodec( _enc=enc, _dec=dec, + pld_spec=ipc_pld_spec, # payload_msg_specs=payload_msg_specs, # **kwargs, ) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 7d64e76..2411f0f 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -34,20 +34,13 @@ from typing import ( ) from msgspec import ( + defstruct, + # field, Struct, UNSET, + UnsetType, ) -# TODO: sub-decoded `Raw` fields? -# -[ ] see `MsgCodec._payload_decs` notes -# -# class Header(Struct, tag=True): -# ''' -# A msg header which defines payload properties - -# ''' -# payload_tag: str|None = None - # type variable for the boxed payload field `.pld` PayloadT = TypeVar('PayloadT') @@ -57,6 +50,9 @@ class Msg( Generic[PayloadT], tag=True, tag_field='msg_type', + + # eq=True, + # order=True, ): ''' The "god" boxing msg type. @@ -66,8 +62,13 @@ class Msg( tree. ''' - # TODO: use UNSET here? cid: str|None # call/context-id + # ^-TODO-^: more explicit type? + # -[ ] use UNSET here? + # https://jcristharif.com/msgspec/supported-types.html#unset + # + # -[ ] `uuid.UUID` which has multi-protocol support + # https://jcristharif.com/msgspec/supported-types.html#uuid # The msgs "payload" (spelled without vowels): # https://en.wikipedia.org/wiki/Payload_(computing) @@ -136,19 +137,18 @@ class Start( pld: FuncSpec -FuncType: Literal[ - 'asyncfunc', - 'asyncgen', - 'context', # TODO: the only one eventually? -] = 'context' - - class IpcCtxSpec(Struct): ''' An inter-actor-`trio.Task`-comms `Context` spec. ''' - functype: FuncType + # TODO: maybe better names for all these? + # -[ ] obvi ^ would need sync with `._rpc` + functype: Literal[ + 'asyncfunc', + 'asyncgen', + 'context', # TODO: the only one eventually? + ] # TODO: as part of the reponse we should report our allowed # msg spec which should be generated from the type-annots as @@ -182,6 +182,7 @@ class Started( decorated IPC endpoint. ''' + pld: PayloadT # TODO: instead of using our existing `Start` @@ -198,6 +199,7 @@ class Yield( Per IPC transmission of a value from `await MsgStream.send()`. ''' + pld: PayloadT class Stop(Msg): @@ -206,7 +208,7 @@ class Stop(Msg): of `StopAsyncIteration`. ''' - pld: UNSET + pld: UnsetType = UNSET class Return( @@ -218,6 +220,7 @@ class Return( func-as-`trio.Task`. ''' + pld: PayloadT class ErrorData(Struct): @@ -258,13 +261,47 @@ class Error(Msg): # cid: str +# built-in SC shuttle protocol msg type set in +# approx order of the IPC txn-state spaces. +__spec__: list[Msg] = [ + + # inter-actor RPC initiation + Start, + StartAck, + + # no-outcome-yet IAC (inter-actor-communication) + Started, + Yield, + Stop, + + # termination outcomes + Return, + Error, +] + +_runtime_spec_msgs: list[Msg] = [ + Start, + StartAck, + Stop, + Error, +] +_payload_spec_msgs: list[Msg] = [ + Started, + Yield, + Return, +] + + def mk_msg_spec( payload_type_union: Union[Type] = Any, - boxing_msg_set: set[Msg] = { - Started, - Yield, - Return, - }, + + # boxing_msg_set: list[Msg] = _payload_spec_msgs, + spec_build_method: Literal[ + 'indexed_generics', # works + 'defstruct', + 'types_new_class', + + ] = 'indexed_generics', ) -> tuple[ Union[Type[Msg]], @@ -281,26 +318,58 @@ def mk_msg_spec( ''' submsg_types: list[Type[Msg]] = Msg.__subclasses__() + bases: tuple = ( + # XXX NOTE XXX the below generic-parameterization seems to + # be THE ONLY way to get this to work correctly in terms + # of getting ValidationError on a roundtrip? + Msg[payload_type_union], + Generic[PayloadT], + ) + defstruct_bases: tuple = ( + Msg, # [payload_type_union], + # Generic[PayloadT], + # ^-XXX-^: not allowed? lul.. + ) + ipc_msg_types: list[Msg] = [] - # TODO: see below as well, - # => union building approach with `.__class_getitem__()` - # doesn't seem to work..? - # - # payload_type_spec: Union[Type[Msg]] - # - msg_types: list[Msg] = [] - for msgtype in boxing_msg_set: + idx_msg_types: list[Msg] = [] + defs_msg_types: list[Msg] = [] + nc_msg_types: list[Msg] = [] + + for msgtype in __spec__: + + # for the NON-payload (user api) type specify-able + # msgs types, we simply aggregate the def as is + # for inclusion in the output type `Union`. + if msgtype not in _payload_spec_msgs: + ipc_msg_types.append(msgtype) + continue # check inheritance sanity assert msgtype in submsg_types # TODO: wait why do we need the dynamic version here? - # -[ ] paraming the `PayloadT` values via `Generic[T]` - # doesn't seem to work at all? - # -[ ] is there a way to get it to work at module level - # just using inheritance or maybe a metaclass? + # XXX ANSWER XXX -> BC INHERITANCE.. don't work w generics.. # - # index_paramed_msg_type: Msg = msgtype[payload_type_union] + # NOTE previously bc msgtypes WERE NOT inheritting + # directly the `Generic[PayloadT]` type, the manual method + # of generic-paraming with `.__class_getitem__()` wasn't + # working.. + # + # XXX but bc i changed that to make every subtype inherit + # it, this manual "indexed parameterization" method seems + # to work? + # + # -[x] paraming the `PayloadT` values via `Generic[T]` + # does work it seems but WITHOUT inheritance of generics + # + # -[-] is there a way to get it to work at module level + # just using inheritance or maybe a metaclass? + # => thot that `defstruct` might work, but NOPE, see + # below.. + # + idxed_msg_type: Msg = msgtype[payload_type_union] + idx_msg_types.append(idxed_msg_type) # TODO: WHY do we need to dynamically generate the # subtype-msgs here to ensure the `.pld` parameterization @@ -308,30 +377,69 @@ def mk_msg_spec( # `msgpack.Decoder()`..? # # dynamically create the payload type-spec-limited msg set. - manual_paramed_msg_subtype: Type = types.new_class( - msgtype.__name__, - ( - # XXX NOTE XXX this seems to be THE ONLY - # way to get this to work correctly!?! - Msg[payload_type_union], - Generic[PayloadT], - ), - {}, + newclass_msgtype: Type = types.new_class( + name=msgtype.__name__, + bases=bases, + kwds={}, + ) + nc_msg_types.append( + newclass_msgtype[payload_type_union] ) - # TODO: grok the diff here better.. + # with `msgspec.structs.defstruct` + # XXX ALSO DOESN'T WORK + defstruct_msgtype = defstruct( + name=msgtype.__name__, + fields=[ + ('cid', str), + + # XXX doesn't seem to work.. + # ('pld', PayloadT), + + ('pld', payload_type_union), + ], + bases=defstruct_bases, + ) + defs_msg_types.append(defstruct_msgtype) + # assert index_paramed_msg_type == manual_paramed_msg_subtype - # XXX TODO: why does the manual method work but not the - # `.__class_getitem__()` one!?! - paramed_msg_type = manual_paramed_msg_subtype + # paramed_msg_type = manual_paramed_msg_subtype - # payload_type_spec |= paramed_msg_type - msg_types.append(paramed_msg_type) + # ipc_payload_msgs_type_union |= index_paramed_msg_type + idx_spec: Union[Type[Msg]] = Union[*idx_msg_types] + def_spec: Union[Type[Msg]] = Union[*defs_msg_types] + nc_spec: Union[Type[Msg]] = Union[*nc_msg_types] + + specs: dict[str, Union[Type[Msg]]] = { + 'indexed_generics': idx_spec, + 'defstruct': def_spec, + 'types_new_class': nc_spec, + } + msgtypes_table: dict[str, list[Msg]] = { + 'indexed_generics': idx_msg_types, + 'defstruct': defs_msg_types, + 'types_new_class': nc_msg_types, + } + + # XXX lol apparently type unions can't ever + # be equal eh? + # TODO: grok the diff here better.. + # + # assert ( + # idx_spec + # == + # nc_spec + # == + # def_spec + # ) + # breakpoint() + + pld_spec: Union[Type] = specs[spec_build_method] + runtime_spec: Union[Type] = Union[*ipc_msg_types] - payload_type_spec: Union[Type[Msg]] = Union[*msg_types] return ( - payload_type_spec, - msg_types, + pld_spec | runtime_spec, + msgtypes_table[spec_build_method] + ipc_msg_types, )