Fix shutdown deadlock on UDS unlink race

Wrap `os.unlink()` in `close_listener()` with a `FileNotFoundError`
guard — under concurrent pytest sessions the sock-file can already be
reaped. Without this the raise aborts `_serve_ipc_eps`'s finally before
`_shutdown.set()`, deadlocking `wait_for_shutdown()` on
`actor.cancel()`.

Also,
- close each endpoint independently in the finally so one raise doesn't
  strand the rest.
- always signal `_shutdown.set()` regardless of remaining ep count.

(this commit msg was generated in some part by [`claude-code`][claude-code-gh])
[claude-code-gh]: https://github.com/anthropics/claude-code
subint_forkserver_backend
Gud Boi 2026-05-06 14:11:51 -04:00
parent 7b14fdcd96
commit 2ee44a6fdd
2 changed files with 34 additions and 11 deletions

View File

@ -1122,20 +1122,32 @@ async def _serve_ipc_eps(
) )
finally: finally:
# close every endpoint INDEPENDENTLY: a close raising
# mid-iter (e.g. UDS `os.unlink` racing concurrent reap) must
# not strand the rest of the eps + must not skip the
# `_shutdown.set()` below.
if eps: if eps:
addr: Address addr: Address
ep: Endpoint ep: Endpoint
for addr, ep in server.epsdict().items(): for addr, ep in list(server.epsdict().items()):
ep.close_listener() try:
server._endpoints.remove(ep) ep.close_listener()
except Exception as ep_close_err:
log.exception(
f'Endpoint close raised, continuing teardown\n'
f' |_{ep!r}\n'
f' |_{ep_close_err!r}\n'
)
finally:
try:
server._endpoints.remove(ep)
except ValueError:
pass
# actor = _state.current_actor() # always signal "shutdown" so `actor.cancel()` →
# if actor.is_arbiter: # `ipc_server.wait_for_shutdown()` doesn't deadlock when an
# import pdbp; pdbp.set_trace() # endpoint close raised above.
if server._shutdown is not None:
# signal the server is "shutdown"/"terminated"
# since no more active endpoints are active.
if not server._endpoints:
server._shutdown.set() server._shutdown.set()
@acm @acm

View File

@ -344,7 +344,18 @@ def close_listener(
''' '''
lstnr.socket.close() lstnr.socket.close()
os.unlink(addr.sockpath) # tolerate the sock-file being already gone — under concurrent
# pytest sessions sharing the bindspace dir, another session's
# reap path can unlink it first; raising here aborts the
# `_serve_ipc_eps` finally before `_shutdown.set()`, deadlocking
# `wait_for_shutdown()` on `actor.cancel()`.
try:
os.unlink(addr.sockpath)
except FileNotFoundError:
log.warning(
f'UDS sock-file already unlinked, skipping\n'
f' |_{addr.sockpath}\n'
)
async def open_unix_socket_w_passcred( async def open_unix_socket_w_passcred(