From 20cb99ecd49df5839c9a424d71533a67682547e8 Mon Sep 17 00:00:00 2001 From: goodboy Date: Fri, 12 Jun 2026 13:37:05 -0400 Subject: [PATCH] Add `cpu_perf_headroom()` for throttle-aware deadlines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mass `trio` deadline-miss failures on byte-identical code turned out to be a firmware/EC power-cap (AMD PPT/STAPM) clamping the all-core sustained clock while every static knob (`governor`, `scaling_max_freq`, EPP, platform-profile) still read "performance" — invisible to the existing `cpu_scaling_factor()` check. See `scripts/cpu-perf-check` + the `ai/conc-anal/trio_033_cancel_cascade_slowdown_depth3_issue.md` notes. Deats, - add `_measure_sustained_headroom()` to `tests/conftest.py`: a one-shot ~0.9s all-core burn (explicit `fork`-ctx `mp` procs) sampling achieved-vs-max freq AFTER the boost window; under a 0.6 gate it returns the full inverse fraction (capped 4x), else 1.0; best-effort 1.0 on non-linux or any error, - add `cpu_perf_headroom()`: `max()` of the static scaling factor and the (session-cached) sustained probe, - inflate deadline budgets by it in `test_dynamic_pub_sub`, both `test_clustering` cases, the `test_multi_nested_subactors_error_through_nurseries` pexpect waits + `test_nested_multierrors`, - `xfail(strict=False)` `test_nested_multierrors` depth=3 under throttle: the deep tree trips tractor's INTERNAL reap deadlines (`soft_kill`/`hard_kill` `terminate_after=1.6`) minting a `Cancelled` inside the runtime — not fixable by test-budget inflation; auto-clears once the box un-throttles. (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- tests/conftest.py | 133 +++++++++++++++++++++++++++++++ tests/devx/test_debugger.py | 11 +++ tests/test_advanced_streaming.py | 9 ++- tests/test_cancellation.py | 42 ++++++++++ tests/test_clustering.py | 15 +++- 5 files changed, 208 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7d9f01b8..bb527ae1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -120,6 +120,139 @@ def cpu_scaling_factor() -> float: return 1. +# session-cached sustained-load throttle multiplier — measured +# once (lazily) on the first `cpu_perf_headroom()` call. `None` +# = not-yet-measured. +_sustained_headroom: float|None = None + + +def _measure_sustained_headroom( + secs: float = 0.9, + # a healthy all-core sustained clock holds AT/ABOVE this + # fraction of the package single-core max ceiling (boost sags + # under full multi-core load even un-throttled, but not far); + # at/above it we assume no throttle and return 1.0. + throttle_gate: float = 0.6, + max_headroom: float = 4., +) -> float: + ''' + One-shot all-core burn returning a latency multiplier + (>= 1.0) that reflects *sustained-load* CPU throttle. + + Catches the firmware/EC power-cap clamp (AMD PPT/STAPM & + friends) that pins achieved `scaling_cur_freq` to a fraction + of the ceiling under multi-core load while EVERY static knob + (`governor`, `scaling_max_freq`, `EPP`, `platform_profile`) + still reads "full performance". That cap is INVISIBLE to + `cpu_scaling_factor()` and is the gremlin behind mass `trio` + deadline-miss failures on byte-identical code — see + `scripts/cpu-perf-check`. + + Best-effort: returns 1.0 on non-linux / missing sysfs / any + error so it can never break a test run. + + ''' + import glob + import multiprocessing as mp + + def _read_mhz(path: str) -> int|None: + try: + return int(open(path).read()) // 1000 + except OSError: + return None + + try: + maxs: list[int] = [ + v for f in glob.glob( + '/sys/devices/system/cpu/cpu[0-9]*/cpufreq/scaling_max_freq' + ) + if (v := _read_mhz(f)) is not None + ] + pkg_max: int = max(maxs) if maxs else 0 + if not pkg_max: + return 1. + + def _burn(stop: float) -> None: + x: int = 1 + while time.perf_counter() < stop: + x += x * x ^ 0x5 + + # explicit `fork` ctx so we're immune to whatever global + # mp start-method tractor/the suite may have set (`spawn` + # would re-exec + re-import 24x — slow and pointless here). + ctx = mp.get_context('fork') + ncpu: int = os.cpu_count() or 1 + stop: float = time.perf_counter() + secs + procs = [ + ctx.Process(target=_burn, args=(stop,), daemon=True) + for _ in range(ncpu) + ] + for p in procs: + p.start() + + # skip the ~0.4s boost window so we sample the steady + # state AFTER any power-cap has engaged. + samples: list[int] = [] + time.sleep(0.4) + while time.perf_counter() < stop - 0.1: + curs: list[int] = [ + v for f in glob.glob( + '/sys/devices/system/cpu/cpu[0-9]*/cpufreq/scaling_cur_freq' + ) + if (v := _read_mhz(f)) is not None + ] + if curs: + samples.append(sum(curs) // len(curs)) + time.sleep(0.15) + for p in procs: + p.join() + + if not samples: + return 1. + frac: float = (sum(samples) // len(samples)) / pkg_max + # below the gate we read it as a power-cap throttle. The + # spawn/IPC/fork-bound work these budgets guard slows ~1:1 + # with the achieved-vs-max freq ratio, so compensate by the + # FULL inverse fraction (a boost-discounted factor + # under-shoots and still trips the marginal cases). + if frac >= throttle_gate: + return 1. + return min(max_headroom, 1. / frac) + + except Exception: + return 1. + + +def cpu_perf_headroom() -> float: + ''' + Latency-headroom multiplier (>= 1.0) covering BOTH cpu-perf + throttle classes — multiply a test's deadline by it, e.g. + `timeout *= cpu_perf_headroom()`: + + - static cpu-freq scaling — via `cpu_scaling_factor()` + (governor/policy lowered the `scaling_max_freq` ceiling). + + - sustained-load power-cap throttle — via + `_measure_sustained_headroom()` (firmware/EC PPT/STAPM + clamps achieved freq under load while every static knob + reads "performance"; INVISIBLE to the static check). This + is the gremlin behind mass `trio` deadline-miss failures + on unchanged code — see + `ai/conc-anal/trio_033_cancel_cascade_slowdown_depth3_issue.md`. + + The sustained probe runs ONCE per session (cached); the cost + is a ~0.9s all-core burn on first call only. + + ''' + global _sustained_headroom + static: float = cpu_scaling_factor() + if _non_linux: + return static + if _sustained_headroom is None: + _sustained_headroom = _measure_sustained_headroom() + return max(static, _sustained_headroom) + + # NOTE, the `--ll`/`--tl` CLI flags + the `loglevel`, `test_log` # and `testing_pkg_name` fixtures have been factored into the # `tractor._testing.pytest` plugin (loaded via the `-p` entry in diff --git a/tests/devx/test_debugger.py b/tests/devx/test_debugger.py index 1c3338c5..dfcf36d8 100644 --- a/tests/devx/test_debugger.py +++ b/tests/devx/test_debugger.py @@ -794,6 +794,14 @@ def test_multi_nested_subactors_error_through_nurseries( loglevel='pdb', ) last_send_char: str|None = None + + # inflate pexpect waits under CPU throttle — incl. the + # sustained-load power-cap invisible to static freq reads — so + # a slow-to-boot child REPL doesn't trip a false `TIMEOUT`. + # See `scripts/cpu-perf-check`. + from ..conftest import cpu_perf_headroom + headroom: float = cpu_perf_headroom() + for ( i, send_char, @@ -817,6 +825,9 @@ def test_multi_nested_subactors_error_through_nurseries( if is_forking_spawner: timeout += 4 + if headroom != 1.: + timeout *= headroom + try: child.expect( PROMPT, diff --git a/tests/test_advanced_streaming.py b/tests/test_advanced_streaming.py index 9b1a476f..62fff897 100644 --- a/tests/test_advanced_streaming.py +++ b/tests/test_advanced_streaming.py @@ -188,11 +188,18 @@ def test_dynamic_pub_sub( # sits forever until external SIGINT. The `afk_alarm_w_trace` # outer guard below is the AFK-safety counterpart (SIGALRM # raises in the main thread regardless of trio scope state). - fail_after_s: int = ( + fail_after_s: float = ( 8 if is_forking_spawner else 20 ) + # inflate under CPU throttle — incl. the sustained-load + # power-cap invisible to static freq reads — so a slow box + # doesn't trip the deadline. See `scripts/cpu-perf-check`. + from .conftest import cpu_perf_headroom + headroom: float = cpu_perf_headroom() + if headroom != 1.: + fail_after_s *= headroom async def main(): # bug-class-3 breadcrumb: tag each level of the cancel path diff --git a/tests/test_cancellation.py b/tests/test_cancellation.py index f35d7bc0..86a073f9 100644 --- a/tests/test_cancellation.py +++ b/tests/test_cancellation.py @@ -592,6 +592,15 @@ async def test_nested_multierrors( # depth=3, BOTH variants will reliably `xpass` and # pytest will yell — our signal to drop the marker. See # `ai/conc-anal/cancel_cascade_too_slow_under_main_thread_forkserver_issue.md`. + # + # Probe CPU throttle ONCE up-front (folds in the sustained-load + # power-cap that static freq reads miss): used BOTH to inflate + # the deadline budget below AND to xfail depth=3, whose failure + # mode under throttle is a runtime-internal reap deadline — not + # a test-budget miss. See `scripts/cpu-perf-check`. + from .conftest import cpu_perf_headroom + headroom: float = cpu_perf_headroom() + if start_method == 'main_thread_forkserver': request.node.add_marker( pytest.mark.xfail( @@ -605,6 +614,34 @@ async def test_nested_multierrors( ) ) + # Under CPU throttle (incl. the sustained-load power-cap that + # static freq reads miss) the DEEP depth=3 tree trips tractor's + # INTERNAL reap deadlines (`soft_kill`/`hard_kill` + # `move_on_after`/`terminate_after=1.6`) before slow subprocs + # exit, injecting a `Cancelled(source='deadline')` into the BEG + # — the SAME shape-mismatch class as the MTF xfail above, and + # NOT fixable by inflating the test-level budget (the Cancelled + # is minted inside the runtime, not by our `fail_after`). + # xfail(strict=False) so it auto-clears the moment the box is + # un-throttled (`headroom == 1.`); depth=1's shallow tree stays + # under those internal deadlines so it just rides the budget + # inflation below. See `scripts/cpu-perf-check`. + elif ( + depth == 3 + and + headroom != 1. + ): + request.node.add_marker( + pytest.mark.xfail( + strict=False, + reason=( + 'CPU throttled — tractor reap deadline injects ' + 'Cancelled into BEG; see conc-anal/' + 'trio_033_cancel_cascade_slowdown_depth3_issue.md' + ), + ) + ) + # Per-backend/-depth budgets: in the non-hang case the # whole spawn + cancel-cascade should complete in well # under these. On the borderline hang case the @@ -632,6 +669,11 @@ async def test_nested_multierrors( case ('main_thread_forkserver', 3): timeout = 30 + # inflate the budget by the throttle headroom probed above so + # a slow box doesn't masquerade as a deadline regression. + if headroom != 1.: + timeout *= headroom + async with fail_after_w_trace(timeout): try: async with tractor.open_nursery() as nursery: diff --git a/tests/test_clustering.py b/tests/test_clustering.py index efb47d19..c426c746 100644 --- a/tests/test_clustering.py +++ b/tests/test_clustering.py @@ -24,8 +24,14 @@ def test_empty_mngrs_input_raises( 'actor-cluster teardown hangs intermittently on UDS' ) + # inflate under CPU throttle — incl. the sustained-load + # power-cap invisible to static freq reads. See + # `scripts/cpu-perf-check`. + from .conftest import cpu_perf_headroom + fail_after_s: float = 3 * cpu_perf_headroom() + async def main(): - with trio.fail_after(3): + with trio.fail_after(fail_after_s): async with ( open_actor_cluster( modules=[__name__], @@ -93,6 +99,13 @@ async def test_streaming_to_actor_cluster( 10 if is_forking_spawner else 6 ) + # inflate under CPU throttle — incl. the sustained-load + # power-cap invisible to static freq reads. See + # `scripts/cpu-perf-check`. + from .conftest import cpu_perf_headroom + headroom: float = cpu_perf_headroom() + if headroom != 1.: + delay *= headroom with trio.fail_after(delay): async with ( open_actor_cluster(modules=[__name__]) as portals,