Fix when root-actor addrs is set as rtvs

Move `_root_addrs` assignment to after `async_main()` unblocks (via
`.started()`) which now delivers the bind addrs , ensuring correct
`UnwrappedAddress` propagation into `._state._runtime_vars` for
non-registar root actors..

Previously for non-registrar root actors the `._state._runtime_vars`
entries were being set as `Address` values which ofc IPC serialize
incorrectly rn vs. the unwrapped versions, (well until we add a msgspec
for their structs anyway) and thus are passed in incorrect form to
children/subactors during spawning..

This fixes the issue by waiting for the `.ipc.*` stack to
bind-and-resolve any randomly allocated addrs (by the OS) until after
the initial `Actor` startup is complete.

Deats,
- primarily, mv `_root_addrs` assignment from before `root_tn.start()`
  to after, using started(-ed) `accept_addrs` now delivered from
  `._runtime.async_main()`..
- update `task_status` type hints to match.
- unpack and set the `(accept_addrs, reg_addrs)` tuple from
  `root_tn.start()` call into `._state._runtime_vars` entries.
- improve and embolden comments distinguishing registrar vs non-registrar
  init paths, ensure typing reflects wrapped vs. unwrapped addrs.

Also,
- add a masked `mk_pdb().set_trace()` for debugging `raddrs` values
  being "off".
- add TODO about using UDS on linux for root mailbox
- rename `trans_bind_addrs` -> `tpt_bind_addrs` for clarity.
- expand comment about random port allocation for
  non-registrar case

(this commit msg was generated in some part by [`claude-code`][claude-code-gh])
[claude-code-gh]: https://github.com/anthropics/claude-code
root_actor_raddrs_fix
Gud Boi 2026-02-10 20:33:19 -05:00
parent cdcc1b42fc
commit 8aee24e83f
2 changed files with 82 additions and 23 deletions

View File

@ -88,7 +88,8 @@ async def maybe_block_bp(
bp_blocked: bool
if (
debug_mode
and maybe_enable_greenback
and
maybe_enable_greenback
and (
maybe_mod := await debug.maybe_init_greenback(
raise_not_found=False,
@ -385,10 +386,13 @@ async def open_root_actor(
addr,
)
trans_bind_addrs: list[UnwrappedAddress] = []
tpt_bind_addrs: list[
Address # `Address.get_random()` case
|UnwrappedAddress # registrar case `= uw_reg_addrs`
] = []
# Create a new local root-actor instance which IS NOT THE
# REGISTRAR
# ------ NON-REGISTRAR ------
# create a new root-actor instance.
if ponged_addrs:
if ensure_registry:
raise RuntimeError(
@ -415,12 +419,21 @@ async def open_root_actor(
# XXX INSTEAD, bind random addrs using the same tpt
# proto.
for addr in ponged_addrs:
trans_bind_addrs.append(
tpt_bind_addrs.append(
# XXX, these are `Address` NOT `UnwrappedAddress`.
#
# NOTE, in the case of posix/berkley socket
# protos we allocate port=0 such that the system
# allocates a random value at bind time; this
# happens in the `.ipc.*` stack's backend.
addr.get_random(
bindspace=addr.bindspace,
)
)
# ------ REGISTRAR ------
# create a new "registry providing" root-actor instance.
#
# Start this local actor as the "registrar", aka a regular
# actor who manages the local registry of "mailboxes" of
# other process-tree-local sub-actors.
@ -429,7 +442,7 @@ async def open_root_actor(
# following init steps are taken:
# - the tranport layer server is bound to each addr
# pair defined in provided registry_addrs, or the default.
trans_bind_addrs = uw_reg_addrs
tpt_bind_addrs = uw_reg_addrs
# - it is normally desirable for any registrar to stay up
# indefinitely until either all registered (child/sub)
@ -449,20 +462,10 @@ async def open_root_actor(
enable_modules=enable_modules,
)
# XXX, in case the root actor runtime was actually run from
# `tractor.to_asyncio.run_as_asyncio_guest()` and NOt
# `tractor.to_asyncio.run_as_asyncio_guest()` and NOT
# `.trio.run()`.
actor._infected_aio = _state._runtime_vars['_is_infected_aio']
# NOTE, only set the loopback addr for the
# process-tree-global "root" mailbox since all sub-actors
# should be able to speak to their root actor over that
# channel.
raddrs: list[Address] = _state._runtime_vars['_root_addrs']
raddrs.extend(trans_bind_addrs)
# TODO, remove once we have also removed all usage;
# eventually all (root-)registry apis should expect > 1 addr.
_state._runtime_vars['_root_mailbox'] = raddrs[0]
# Start up main task set via core actor-runtime nurseries.
try:
# assign process-local actor
@ -499,14 +502,39 @@ async def open_root_actor(
# "actor runtime" primitives are SC-compat and thus all
# transitively spawned actors/processes must be as
# well.
await root_tn.start(
accept_addrs: list[UnwrappedAddress]
reg_addrs: list[UnwrappedAddress]
(
accept_addrs,
reg_addrs,
) = await root_tn.start(
partial(
_runtime.async_main,
actor,
accept_addrs=trans_bind_addrs,
accept_addrs=tpt_bind_addrs,
parent_addr=None
)
)
# NOTE, only set a local-host addr (i.e. like
# `lo`-loopback for TCP) for the process-tree-global
# "root"-process (its tree-wide "mailbox") since all
# sub-actors should be able to speak to their root
# actor over that channel.
#
# ?TODO, per-OS non-network-proto alt options?
# -[ ] on linux we should be able to always use UDS?
#
raddrs: list[Address] = _state._runtime_vars['_root_addrs']
raddrs.extend(
accept_addrs,
)
# TODO, remove once we have also removed all usage;
# eventually all (root-)registry apis should expect > 1 addr.
_state._runtime_vars['_root_mailbox'] = raddrs[0]
# if 'chart' in actor.aid.name:
# from tractor.devx import mk_pdb
# mk_pdb().set_trace()
try:
yield actor
except (

View File

@ -147,6 +147,8 @@ def get_mod_nsps2fps(mod_ns_paths: list[str]) -> dict[str, str]:
return nsp2fp
_bp = False
class Actor:
'''
The fundamental "runtime" concurrency primitive.
@ -272,7 +274,9 @@ class Actor:
stacklevel=2,
)
registry_addrs: list[Address] = [wrap_address(arbiter_addr)]
registry_addrs: list[Address] = [
wrap_address(arbiter_addr)
]
# marked by the process spawning backend at startup
# will be None for the parent most process started manually
@ -959,6 +963,21 @@ class Actor:
rvs['_is_root'] = False # obvi XD
# TODO, remove! left in just while protoing init fix!
# global _bp
# if (
# 'chart' in self.aid.name
# and
# isinstance(
# rvs['_root_addrs'][0],
# dict,
# )
# and
# not _bp
# ):
# _bp = True
# breakpoint()
_state._runtime_vars.update(rvs)
# `SpawnSpec.reg_addrs`
@ -1455,7 +1474,12 @@ async def async_main(
# be False when running as root actor and True when as
# a subactor.
parent_addr: UnwrappedAddress|None = None,
task_status: TaskStatus[None] = trio.TASK_STATUS_IGNORED,
task_status: TaskStatus[
tuple[
list[UnwrappedAddress], # accept_addrs
list[UnwrappedAddress], # reg_addrs
]
] = trio.TASK_STATUS_IGNORED,
) -> None:
'''
@ -1634,6 +1658,7 @@ async def async_main(
# if addresses point to the same actor..
# So we need a way to detect that? maybe iterate
# only on unique actor uids?
addr: UnwrappedAddress
for addr in actor.reg_addrs:
try:
waddr = wrap_address(addr)
@ -1642,7 +1667,9 @@ async def async_main(
await debug.pause()
# !TODO, get rid of the local-portal crap XD
reg_portal: Portal
async with get_registry(addr) as reg_portal:
accept_addr: UnwrappedAddress
for accept_addr in accept_addrs:
accept_addr = wrap_address(accept_addr)
@ -1658,8 +1685,12 @@ async def async_main(
is_registered: bool = True
# init steps complete
task_status.started()
# init steps complete, deliver IPC-server and
# registrar addrs back to caller.
task_status.started((
accept_addrs,
actor.reg_addrs,
))
# Begin handling our new connection back to our
# parent. This is done last since we don't want to