diff --git a/tractor/ipc/_server.py b/tractor/ipc/_server.py index 3fd965c5..9701ec6d 100644 --- a/tractor/ipc/_server.py +++ b/tractor/ipc/_server.py @@ -1122,20 +1122,32 @@ async def _serve_ipc_eps( ) finally: + # close every endpoint INDEPENDENTLY: a close raising + # mid-iter (e.g. UDS `os.unlink` racing concurrent reap) must + # not strand the rest of the eps + must not skip the + # `_shutdown.set()` below. if eps: addr: Address ep: Endpoint - for addr, ep in server.epsdict().items(): - ep.close_listener() - server._endpoints.remove(ep) + for addr, ep in list(server.epsdict().items()): + try: + ep.close_listener() + except Exception as ep_close_err: + log.exception( + f'Endpoint close raised, continuing teardown\n' + f' |_{ep!r}\n' + f' |_{ep_close_err!r}\n' + ) + finally: + try: + server._endpoints.remove(ep) + except ValueError: + pass - # actor = _state.current_actor() - # if actor.is_arbiter: - # import pdbp; pdbp.set_trace() - - # signal the server is "shutdown"/"terminated" - # since no more active endpoints are active. - if not server._endpoints: + # always signal "shutdown" so `actor.cancel()` → + # `ipc_server.wait_for_shutdown()` doesn't deadlock when an + # endpoint close raised above. + if server._shutdown is not None: server._shutdown.set() @acm diff --git a/tractor/ipc/_uds.py b/tractor/ipc/_uds.py index 3b214f6a..8c57664d 100644 --- a/tractor/ipc/_uds.py +++ b/tractor/ipc/_uds.py @@ -344,7 +344,18 @@ def close_listener( ''' lstnr.socket.close() - os.unlink(addr.sockpath) + # tolerate the sock-file being already gone — under concurrent + # pytest sessions sharing the bindspace dir, another session's + # reap path can unlink it first; raising here aborts the + # `_serve_ipc_eps` finally before `_shutdown.set()`, deadlocking + # `wait_for_shutdown()` on `actor.cancel()`. + try: + os.unlink(addr.sockpath) + except FileNotFoundError: + log.warning( + f'UDS sock-file already unlinked, skipping\n' + f' |_{addr.sockpath}\n' + ) async def open_unix_socket_w_passcred(