Add `cpu_perf_headroom()` for throttle-aware deadlines
Mass `trio` deadline-miss failures on byte-identical code turned out to be a firmware/EC power-cap (AMD PPT/STAPM) clamping the all-core sustained clock while every static knob (`governor`, `scaling_max_freq`, EPP, platform-profile) still read "performance" — invisible to the existing `cpu_scaling_factor()` check. See `scripts/cpu-perf-check` + the `ai/conc-anal/trio_033_cancel_cascade_slowdown_depth3_issue.md` notes. Deats, - add `_measure_sustained_headroom()` to `tests/conftest.py`: a one-shot ~0.9s all-core burn (explicit `fork`-ctx `mp` procs) sampling achieved-vs-max freq AFTER the boost window; under a 0.6 gate it returns the full inverse fraction (capped 4x), else 1.0; best-effort 1.0 on non-linux or any error, - add `cpu_perf_headroom()`: `max()` of the static scaling factor and the (session-cached) sustained probe, - inflate deadline budgets by it in `test_dynamic_pub_sub`, both `test_clustering` cases, the `test_multi_nested_subactors_error_through_nurseries` pexpect waits + `test_nested_multierrors`, - `xfail(strict=False)` `test_nested_multierrors` depth=3 under throttle: the deep tree trips tractor's INTERNAL reap deadlines (`soft_kill`/`hard_kill` `terminate_after=1.6`) minting a `Cancelled` inside the runtime — not fixable by test-budget inflation; auto-clears once the box un-throttles. (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-codetest_cpu_throttling
parent
8526985c97
commit
20cb99ecd4
|
|
@ -120,6 +120,139 @@ def cpu_scaling_factor() -> float:
|
|||
return 1.
|
||||
|
||||
|
||||
# session-cached sustained-load throttle multiplier — measured
|
||||
# once (lazily) on the first `cpu_perf_headroom()` call. `None`
|
||||
# = not-yet-measured.
|
||||
_sustained_headroom: float|None = None
|
||||
|
||||
|
||||
def _measure_sustained_headroom(
|
||||
secs: float = 0.9,
|
||||
# a healthy all-core sustained clock holds AT/ABOVE this
|
||||
# fraction of the package single-core max ceiling (boost sags
|
||||
# under full multi-core load even un-throttled, but not far);
|
||||
# at/above it we assume no throttle and return 1.0.
|
||||
throttle_gate: float = 0.6,
|
||||
max_headroom: float = 4.,
|
||||
) -> float:
|
||||
'''
|
||||
One-shot all-core burn returning a latency multiplier
|
||||
(>= 1.0) that reflects *sustained-load* CPU throttle.
|
||||
|
||||
Catches the firmware/EC power-cap clamp (AMD PPT/STAPM &
|
||||
friends) that pins achieved `scaling_cur_freq` to a fraction
|
||||
of the ceiling under multi-core load while EVERY static knob
|
||||
(`governor`, `scaling_max_freq`, `EPP`, `platform_profile`)
|
||||
still reads "full performance". That cap is INVISIBLE to
|
||||
`cpu_scaling_factor()` and is the gremlin behind mass `trio`
|
||||
deadline-miss failures on byte-identical code — see
|
||||
`scripts/cpu-perf-check`.
|
||||
|
||||
Best-effort: returns 1.0 on non-linux / missing sysfs / any
|
||||
error so it can never break a test run.
|
||||
|
||||
'''
|
||||
import glob
|
||||
import multiprocessing as mp
|
||||
|
||||
def _read_mhz(path: str) -> int|None:
|
||||
try:
|
||||
return int(open(path).read()) // 1000
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
try:
|
||||
maxs: list[int] = [
|
||||
v for f in glob.glob(
|
||||
'/sys/devices/system/cpu/cpu[0-9]*/cpufreq/scaling_max_freq'
|
||||
)
|
||||
if (v := _read_mhz(f)) is not None
|
||||
]
|
||||
pkg_max: int = max(maxs) if maxs else 0
|
||||
if not pkg_max:
|
||||
return 1.
|
||||
|
||||
def _burn(stop: float) -> None:
|
||||
x: int = 1
|
||||
while time.perf_counter() < stop:
|
||||
x += x * x ^ 0x5
|
||||
|
||||
# explicit `fork` ctx so we're immune to whatever global
|
||||
# mp start-method tractor/the suite may have set (`spawn`
|
||||
# would re-exec + re-import 24x — slow and pointless here).
|
||||
ctx = mp.get_context('fork')
|
||||
ncpu: int = os.cpu_count() or 1
|
||||
stop: float = time.perf_counter() + secs
|
||||
procs = [
|
||||
ctx.Process(target=_burn, args=(stop,), daemon=True)
|
||||
for _ in range(ncpu)
|
||||
]
|
||||
for p in procs:
|
||||
p.start()
|
||||
|
||||
# skip the ~0.4s boost window so we sample the steady
|
||||
# state AFTER any power-cap has engaged.
|
||||
samples: list[int] = []
|
||||
time.sleep(0.4)
|
||||
while time.perf_counter() < stop - 0.1:
|
||||
curs: list[int] = [
|
||||
v for f in glob.glob(
|
||||
'/sys/devices/system/cpu/cpu[0-9]*/cpufreq/scaling_cur_freq'
|
||||
)
|
||||
if (v := _read_mhz(f)) is not None
|
||||
]
|
||||
if curs:
|
||||
samples.append(sum(curs) // len(curs))
|
||||
time.sleep(0.15)
|
||||
for p in procs:
|
||||
p.join()
|
||||
|
||||
if not samples:
|
||||
return 1.
|
||||
frac: float = (sum(samples) // len(samples)) / pkg_max
|
||||
# below the gate we read it as a power-cap throttle. The
|
||||
# spawn/IPC/fork-bound work these budgets guard slows ~1:1
|
||||
# with the achieved-vs-max freq ratio, so compensate by the
|
||||
# FULL inverse fraction (a boost-discounted factor
|
||||
# under-shoots and still trips the marginal cases).
|
||||
if frac >= throttle_gate:
|
||||
return 1.
|
||||
return min(max_headroom, 1. / frac)
|
||||
|
||||
except Exception:
|
||||
return 1.
|
||||
|
||||
|
||||
def cpu_perf_headroom() -> float:
|
||||
'''
|
||||
Latency-headroom multiplier (>= 1.0) covering BOTH cpu-perf
|
||||
throttle classes — multiply a test's deadline by it, e.g.
|
||||
`timeout *= cpu_perf_headroom()`:
|
||||
|
||||
- static cpu-freq scaling — via `cpu_scaling_factor()`
|
||||
(governor/policy lowered the `scaling_max_freq` ceiling).
|
||||
|
||||
- sustained-load power-cap throttle — via
|
||||
`_measure_sustained_headroom()` (firmware/EC PPT/STAPM
|
||||
clamps achieved freq under load while every static knob
|
||||
reads "performance"; INVISIBLE to the static check). This
|
||||
is the gremlin behind mass `trio` deadline-miss failures
|
||||
on unchanged code — see
|
||||
`ai/conc-anal/trio_033_cancel_cascade_slowdown_depth3_issue.md`.
|
||||
|
||||
The sustained probe runs ONCE per session (cached); the cost
|
||||
is a ~0.9s all-core burn on first call only.
|
||||
|
||||
'''
|
||||
global _sustained_headroom
|
||||
static: float = cpu_scaling_factor()
|
||||
if _non_linux:
|
||||
return static
|
||||
if _sustained_headroom is None:
|
||||
_sustained_headroom = _measure_sustained_headroom()
|
||||
return max(static, _sustained_headroom)
|
||||
|
||||
|
||||
# NOTE, the `--ll`/`--tl` CLI flags + the `loglevel`, `test_log`
|
||||
# and `testing_pkg_name` fixtures have been factored into the
|
||||
# `tractor._testing.pytest` plugin (loaded via the `-p` entry in
|
||||
|
|
|
|||
|
|
@ -794,6 +794,14 @@ def test_multi_nested_subactors_error_through_nurseries(
|
|||
loglevel='pdb',
|
||||
)
|
||||
last_send_char: str|None = None
|
||||
|
||||
# inflate pexpect waits under CPU throttle — incl. the
|
||||
# sustained-load power-cap invisible to static freq reads — so
|
||||
# a slow-to-boot child REPL doesn't trip a false `TIMEOUT`.
|
||||
# See `scripts/cpu-perf-check`.
|
||||
from ..conftest import cpu_perf_headroom
|
||||
headroom: float = cpu_perf_headroom()
|
||||
|
||||
for (
|
||||
i,
|
||||
send_char,
|
||||
|
|
@ -817,6 +825,9 @@ def test_multi_nested_subactors_error_through_nurseries(
|
|||
if is_forking_spawner:
|
||||
timeout += 4
|
||||
|
||||
if headroom != 1.:
|
||||
timeout *= headroom
|
||||
|
||||
try:
|
||||
child.expect(
|
||||
PROMPT,
|
||||
|
|
|
|||
|
|
@ -188,11 +188,18 @@ def test_dynamic_pub_sub(
|
|||
# sits forever until external SIGINT. The `afk_alarm_w_trace`
|
||||
# outer guard below is the AFK-safety counterpart (SIGALRM
|
||||
# raises in the main thread regardless of trio scope state).
|
||||
fail_after_s: int = (
|
||||
fail_after_s: float = (
|
||||
8
|
||||
if is_forking_spawner
|
||||
else 20
|
||||
)
|
||||
# inflate under CPU throttle — incl. the sustained-load
|
||||
# power-cap invisible to static freq reads — so a slow box
|
||||
# doesn't trip the deadline. See `scripts/cpu-perf-check`.
|
||||
from .conftest import cpu_perf_headroom
|
||||
headroom: float = cpu_perf_headroom()
|
||||
if headroom != 1.:
|
||||
fail_after_s *= headroom
|
||||
|
||||
async def main():
|
||||
# bug-class-3 breadcrumb: tag each level of the cancel path
|
||||
|
|
|
|||
|
|
@ -592,6 +592,15 @@ async def test_nested_multierrors(
|
|||
# depth=3, BOTH variants will reliably `xpass` and
|
||||
# pytest will yell — our signal to drop the marker. See
|
||||
# `ai/conc-anal/cancel_cascade_too_slow_under_main_thread_forkserver_issue.md`.
|
||||
#
|
||||
# Probe CPU throttle ONCE up-front (folds in the sustained-load
|
||||
# power-cap that static freq reads miss): used BOTH to inflate
|
||||
# the deadline budget below AND to xfail depth=3, whose failure
|
||||
# mode under throttle is a runtime-internal reap deadline — not
|
||||
# a test-budget miss. See `scripts/cpu-perf-check`.
|
||||
from .conftest import cpu_perf_headroom
|
||||
headroom: float = cpu_perf_headroom()
|
||||
|
||||
if start_method == 'main_thread_forkserver':
|
||||
request.node.add_marker(
|
||||
pytest.mark.xfail(
|
||||
|
|
@ -605,6 +614,34 @@ async def test_nested_multierrors(
|
|||
)
|
||||
)
|
||||
|
||||
# Under CPU throttle (incl. the sustained-load power-cap that
|
||||
# static freq reads miss) the DEEP depth=3 tree trips tractor's
|
||||
# INTERNAL reap deadlines (`soft_kill`/`hard_kill`
|
||||
# `move_on_after`/`terminate_after=1.6`) before slow subprocs
|
||||
# exit, injecting a `Cancelled(source='deadline')` into the BEG
|
||||
# — the SAME shape-mismatch class as the MTF xfail above, and
|
||||
# NOT fixable by inflating the test-level budget (the Cancelled
|
||||
# is minted inside the runtime, not by our `fail_after`).
|
||||
# xfail(strict=False) so it auto-clears the moment the box is
|
||||
# un-throttled (`headroom == 1.`); depth=1's shallow tree stays
|
||||
# under those internal deadlines so it just rides the budget
|
||||
# inflation below. See `scripts/cpu-perf-check`.
|
||||
elif (
|
||||
depth == 3
|
||||
and
|
||||
headroom != 1.
|
||||
):
|
||||
request.node.add_marker(
|
||||
pytest.mark.xfail(
|
||||
strict=False,
|
||||
reason=(
|
||||
'CPU throttled — tractor reap deadline injects '
|
||||
'Cancelled into BEG; see conc-anal/'
|
||||
'trio_033_cancel_cascade_slowdown_depth3_issue.md'
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Per-backend/-depth budgets: in the non-hang case the
|
||||
# whole spawn + cancel-cascade should complete in well
|
||||
# under these. On the borderline hang case the
|
||||
|
|
@ -632,6 +669,11 @@ async def test_nested_multierrors(
|
|||
case ('main_thread_forkserver', 3):
|
||||
timeout = 30
|
||||
|
||||
# inflate the budget by the throttle headroom probed above so
|
||||
# a slow box doesn't masquerade as a deadline regression.
|
||||
if headroom != 1.:
|
||||
timeout *= headroom
|
||||
|
||||
async with fail_after_w_trace(timeout):
|
||||
try:
|
||||
async with tractor.open_nursery() as nursery:
|
||||
|
|
|
|||
|
|
@ -24,8 +24,14 @@ def test_empty_mngrs_input_raises(
|
|||
'actor-cluster teardown hangs intermittently on UDS'
|
||||
)
|
||||
|
||||
# inflate under CPU throttle — incl. the sustained-load
|
||||
# power-cap invisible to static freq reads. See
|
||||
# `scripts/cpu-perf-check`.
|
||||
from .conftest import cpu_perf_headroom
|
||||
fail_after_s: float = 3 * cpu_perf_headroom()
|
||||
|
||||
async def main():
|
||||
with trio.fail_after(3):
|
||||
with trio.fail_after(fail_after_s):
|
||||
async with (
|
||||
open_actor_cluster(
|
||||
modules=[__name__],
|
||||
|
|
@ -93,6 +99,13 @@ async def test_streaming_to_actor_cluster(
|
|||
10 if is_forking_spawner
|
||||
else 6
|
||||
)
|
||||
# inflate under CPU throttle — incl. the sustained-load
|
||||
# power-cap invisible to static freq reads. See
|
||||
# `scripts/cpu-perf-check`.
|
||||
from .conftest import cpu_perf_headroom
|
||||
headroom: float = cpu_perf_headroom()
|
||||
if headroom != 1.:
|
||||
delay *= headroom
|
||||
with trio.fail_after(delay):
|
||||
async with (
|
||||
open_actor_cluster(modules=[__name__]) as portals,
|
||||
|
|
|
|||
Loading…
Reference in New Issue