From c38d0f826e045db66f6be5cf536973157be763c2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 14 Dec 2021 10:54:35 -0500 Subject: [PATCH 1/8] Add an unserializable value causes error before started test --- tests/test_context_stream_semantics.py | 29 +++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index 4c74a9a..b86c21f 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -58,6 +58,33 @@ from conftest import tractor_test _state: bool = False +@tractor.context +async def error_before_started( + ctx: tractor.Context, +) -> None: + # send an unserializable type + await ctx.started(object()) + + +def test_error_before_started(): + async def main(): + async with tractor.open_nursery() as n: + portal = await n.start_actor( + 'errorer', + enable_modules=[__name__], + ) + + async with portal.open_context( + error_before_started + ) as (ctx, sent): + await trio.sleep(1) + + with pytest.raises(tractor.RemoteActorError) as excinfo: + trio.run(main) + + assert excinfo.value.type == TypeError + + @tractor.context async def too_many_starteds( ctx: tractor.Context, @@ -466,7 +493,7 @@ async def cancel_self( try: async with ctx.open_stream(): pass - except ContextCancelled: + except tractor.ContextCancelled: pass # check a real ``trio.Cancelled`` is raised on a checkpoint From 5d424e3703483ce046a1fc11cda0872b2551c907 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 14 Dec 2021 10:56:31 -0500 Subject: [PATCH 2/8] Hide the key error tb on remote starting errors --- tractor/_portal.py | 6 +++--- tractor/_streaming.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 564d329..9b96bd2 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -428,12 +428,12 @@ class Portal: first = msg['started'] ctx._started_called = True - except KeyError: + except KeyError as kerr: assert msg.get('cid'), ("Received internal error at context?") if msg.get('error'): - # raise the error message - raise unpack_error(msg, self.channel) + # raise kerr from unpack_error(msg, self.channel) + raise unpack_error(msg, self.channel) from None else: raise diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 05932b8..86060b9 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -425,7 +425,6 @@ class Context: f'Remote context error for {self.chan.uid}:{self.cid}:\n' f'{msg["error"]["tb_str"]}' ) - # await ctx._maybe_error_from_remote_msg(msg) self._error = unpack_error(msg, self.chan) # TODO: tempted to **not** do this by-reraising in a From 9650b010de47f2b95262b28d5d62d131888d155f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 14 Dec 2021 16:16:57 -0500 Subject: [PATCH 3/8] Add a test for the real issue: error overriding The underlying issue is actually that a nested `Context` which was cancelled was overriding the local error that triggered that secondary's context's cancellation in the first place XD. This test catches that case. Relates to https://github.com/pikers/piker/issues/244 --- tests/test_context_stream_semantics.py | 123 +++++++++++++++++++------ 1 file changed, 96 insertions(+), 27 deletions(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index b86c21f..16587c9 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -5,6 +5,7 @@ Verify the we raise errors when streams are opened prior to sync-opening a ``tractor.Context`` beforehand. ''' +from contextlib import asynccontextmanager as acm from itertools import count import platform from typing import Optional @@ -58,33 +59,6 @@ from conftest import tractor_test _state: bool = False -@tractor.context -async def error_before_started( - ctx: tractor.Context, -) -> None: - # send an unserializable type - await ctx.started(object()) - - -def test_error_before_started(): - async def main(): - async with tractor.open_nursery() as n: - portal = await n.start_actor( - 'errorer', - enable_modules=[__name__], - ) - - async with portal.open_context( - error_before_started - ) as (ctx, sent): - await trio.sleep(1) - - with pytest.raises(tractor.RemoteActorError) as excinfo: - trio.run(main) - - assert excinfo.value.type == TypeError - - @tractor.context async def too_many_starteds( ctx: tractor.Context, @@ -494,7 +468,10 @@ async def cancel_self( async with ctx.open_stream(): pass except tractor.ContextCancelled: + # suppress for now so we can do checkpoint tests below pass + else: + raise RuntimeError('Context didnt cancel itself?!') # check a real ``trio.Cancelled`` is raised on a checkpoint try: @@ -534,6 +511,9 @@ async def test_callee_cancels_before_started(): except tractor.ContextCancelled as ce: ce.type == trio.Cancelled + # the traceback should be informative + assert 'cancelled itself' in ce.msgdata['tb_str'] + # teardown the actor await portal.cancel_actor() @@ -739,3 +719,92 @@ def test_stream_backpressure(): await portal.cancel_actor() trio.run(main) + + +@tractor.context +async def sleep_forever( + ctx: tractor.Context, +) -> None: + await ctx.started() + async with ctx.open_stream(): + await trio.sleep_forever() + + +@acm +async def attach_to_sleep_forever(): + ''' + Cancel a context **before** any underlying error is raised in order + to trigger a local reception of a ``ContextCancelled`` which **should not** + be re-raised in the local surrounding ``Context`` *iff* the cancel was + requested by **this** side of the context. + + ''' + async with tractor.wait_for_actor('sleeper') as p2: + async with ( + p2.open_context(sleep_forever) as (peer_ctx, first), + peer_ctx.open_stream(), + ): + try: + yield + finally: + # XXX: previously this would trigger local + # ``ContextCancelled`` to be received and raised in the + # local context overriding any local error due to + # logic inside ``_invoke()`` which checked for + # an error set on ``Context._error`` and raised it in + # under a cancellation scenario. + + # The problem is you can have a remote cancellation + # that is part of a local error and we shouldn't raise + # ``ContextCancelled`` **iff** we weren't the side of + # the context to initiate it, i.e. + # ``Context._cancel_called`` should **NOT** have been + # set. The special logic to handle this case is now + # inside ``Context._may_raise_from_remote_msg()`` XD + await peer_ctx.cancel() + + +@tractor.context +async def error_before_started( + ctx: tractor.Context, +) -> None: + ''' + This simulates exactly an original bug discovered in: + https://github.com/pikers/piker/issues/244 + + ''' + async with attach_to_sleep_forever(): + # send an unserializable type which should raise a type error + # here and **NOT BE SWALLOWED** by the surrounding acm!!?! + await ctx.started(object()) + + +def test_do_not_swallow_error_before_started_by_remote_contextcancelled(): + ''' + Verify that an error raised in a remote context which itself opens another + remote context, which it cancels, does not ovverride the original error that + caused the cancellation of the secondardy context. + + ''' + async def main(): + async with tractor.open_nursery() as n: + portal = await n.start_actor( + 'errorer', + enable_modules=[__name__], + ) + await n.start_actor( + 'sleeper', + enable_modules=[__name__], + ) + + async with ( + portal.open_context( + error_before_started + ) as (ctx, sent), + ): + await trio.sleep_forever() + + with pytest.raises(tractor.RemoteActorError) as excinfo: + trio.run(main) + + assert excinfo.value.type == TypeError From e2139c2bf0c2fc49f9475dc753843e5bd1704c5c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 14 Dec 2021 23:05:30 -0500 Subject: [PATCH 4/8] Don't set `Context._error` to expected `ContextCancelled` If the one side of an inter-actor context cancels the other then that side should always expect back a `ContextCancelled` message. However we should not set this error in this case (where the cancel request was sent and a `ContextCancelled` msg was received back) since it may override some other error that caused the cancellation request to be sent out in the first place. As an example when a context opens another context to a peer and some error happens which causes the second peer context to be cancelled but we want to propagate the original error. Fixes the issue found in https://github.com/pikers/piker/issues/244 --- tractor/_streaming.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 86060b9..47fd08a 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -425,7 +425,17 @@ class Context: f'Remote context error for {self.chan.uid}:{self.cid}:\n' f'{msg["error"]["tb_str"]}' ) - self._error = unpack_error(msg, self.chan) + error = unpack_error(msg, self.chan) + if ( + isinstance(error, ContextCancelled) and + self._cancel_called + ): + # this is an expected cancel request response message + # and we don't need to raise it in scope since it will + # potentially override a real error + return + + self._error = error # TODO: tempted to **not** do this by-reraising in a # nursery and instead cancel a surrounding scope, detect From 8c004c1f36d3fa157733668d1fc5dbb5398c7a1b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 14 Dec 2021 23:21:28 -0500 Subject: [PATCH 5/8] Add an explicit messaging error for reporting an illegal context transaction --- tractor/_portal.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 9b96bd2..1dc6ee4 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -27,6 +27,7 @@ from typing import ( ) from functools import partial from dataclasses import dataclass +from pprint import pformat import warnings import trio @@ -85,6 +86,9 @@ def _unwrap_msg( assert msg.get('cid'), "Received internal error at portal?" raise unpack_error(msg, channel) +class MessagingError(Exception): + 'Some kind of unexpected SC messaging dialog issue' + class Portal: ''' @@ -408,8 +412,6 @@ class Portal: raise TypeError( f'{func} must be an async generator function!') - __tracebackhide__ = True - fn_mod_path, fn_name = func_deats(func) ctx = await self.actor.start_remote_task( @@ -435,7 +437,10 @@ class Portal: # raise kerr from unpack_error(msg, self.channel) raise unpack_error(msg, self.channel) from None else: - raise + raise MessagingError( + f'Context for {ctx.cid} was expecting a `started` message' + f' but received a non-error msg:\n{pformat(msg)}' + ) _err: Optional[BaseException] = None ctx._portal = self From 98a830ccbac78cf6b597effeff2dd40ed3c66a01 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 14 Dec 2021 23:22:14 -0500 Subject: [PATCH 6/8] Drop cancel traceback capture; don't seem to need it? --- tractor/_actor.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tractor/_actor.py b/tractor/_actor.py index 82e7ae9..e737004 100644 --- a/tractor/_actor.py +++ b/tractor/_actor.py @@ -185,9 +185,6 @@ async def _invoke( task_status.started(cs) await chan.send({'return': await coro, 'cid': cid}) - except trio.Cancelled as err: - tb = err.__traceback__ - except trio.MultiError: # if a context error was set then likely # thei multierror was raised due to that @@ -916,8 +913,9 @@ class Actor: # ``_async_main()`` kwargs['chan'] = chan log.cancel( - f"Actor {self.uid} was remotely cancelled;" - " waiting on cancellation completion..") + f'{self.uid} was remotely cancelled by\n' + f'{chan.uid}!' + ) await _invoke( self, cid, chan, func, kwargs, is_rpc=False ) From 916e27eedc1fb89cf8d3256ab810179ac496d902 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 15 Dec 2021 17:35:28 -0500 Subject: [PATCH 7/8] Adjust cancelled test to expect raised overrun error --- tests/test_context_stream_semantics.py | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index 16587c9..ebf830e 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -608,33 +608,19 @@ def test_one_end_stream_not_opened(overrun_by): # 2 overrun cases and the no overrun case (which pushes right up to # the msg limit) - if overrunner == 'caller': + if overrunner == 'caller' or 'cance' in overrunner: with pytest.raises(tractor.RemoteActorError) as excinfo: trio.run(main) assert excinfo.value.type == StreamOverrun - elif 'cancel' in overrunner: - with pytest.raises(trio.MultiError) as excinfo: - trio.run(main) - - multierr = excinfo.value - - for exc in multierr.exceptions: - etype = type(exc) - if etype == tractor.RemoteActorError: - assert exc.type == StreamOverrun - else: - assert etype == tractor.ContextCancelled - elif overrunner == 'callee': with pytest.raises(tractor.RemoteActorError) as excinfo: trio.run(main) # TODO: embedded remote errors so that we can verify the source - # error? - # the callee delivers an error which is an overrun wrapped - # in a remote actor error. + # error? the callee delivers an error which is an overrun + # wrapped in a remote actor error. assert excinfo.value.type == tractor.RemoteActorError else: From 8eff788d2db2571e13101c59acd3c6b1e91e9e32 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 16 Dec 2021 19:25:21 -0500 Subject: [PATCH 8/8] Pin to previous `trio_typing` release --- requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-test.txt b/requirements-test.txt index 5ad6c45..1d3cfad 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -2,6 +2,6 @@ pytest pytest-trio pdbpp mypy<0.920 -trio_typing +trio_typing<0.7.0 pexpect towncrier