Add `cpu_perf_headroom()` for throttle-aware deadlines

Mass `trio` deadline-miss failures on byte-identical code turned
out to be a firmware/EC power-cap (AMD PPT/STAPM) clamping the
all-core sustained clock while every static knob (`governor`,
`scaling_max_freq`, EPP, platform-profile) still read "performance"
— invisible to the existing `cpu_scaling_factor()` check. See
`scripts/cpu-perf-check` + the
`ai/conc-anal/trio_033_cancel_cascade_slowdown_depth3_issue.md`
notes.

Deats,
- add `_measure_sustained_headroom()` to `tests/conftest.py`: a
  one-shot ~0.9s all-core burn (explicit `fork`-ctx `mp` procs)
  sampling achieved-vs-max freq AFTER the boost window; under a 0.6
  gate it returns the full inverse fraction (capped 4x), else 1.0;
  best-effort 1.0 on non-linux or any error,
- add `cpu_perf_headroom()`: `max()` of the static scaling factor
  and the (session-cached) sustained probe,
- inflate deadline budgets by it in `test_dynamic_pub_sub`, both
  `test_clustering` cases, the
  `test_multi_nested_subactors_error_through_nurseries` pexpect
  waits + `test_nested_multierrors`,
- `xfail(strict=False)` `test_nested_multierrors` depth=3 under
  throttle: the deep tree trips tractor's INTERNAL reap deadlines
  (`soft_kill`/`hard_kill` `terminate_after=1.6`) minting a
  `Cancelled` inside the runtime — not fixable by test-budget
  inflation; auto-clears once the box un-throttles.

(this patch was generated in some part by [`claude-code`][claude-code-gh])
[claude-code-gh]: https://github.com/anthropics/claude-code
test_cpu_throttling
Gud Boi 2026-06-12 13:37:05 -04:00
parent 8526985c97
commit 20cb99ecd4
5 changed files with 208 additions and 2 deletions

View File

@ -120,6 +120,139 @@ def cpu_scaling_factor() -> float:
return 1. return 1.
# session-cached sustained-load throttle multiplier — measured
# once (lazily) on the first `cpu_perf_headroom()` call. `None`
# = not-yet-measured.
_sustained_headroom: float|None = None
def _measure_sustained_headroom(
secs: float = 0.9,
# a healthy all-core sustained clock holds AT/ABOVE this
# fraction of the package single-core max ceiling (boost sags
# under full multi-core load even un-throttled, but not far);
# at/above it we assume no throttle and return 1.0.
throttle_gate: float = 0.6,
max_headroom: float = 4.,
) -> float:
'''
One-shot all-core burn returning a latency multiplier
(>= 1.0) that reflects *sustained-load* CPU throttle.
Catches the firmware/EC power-cap clamp (AMD PPT/STAPM &
friends) that pins achieved `scaling_cur_freq` to a fraction
of the ceiling under multi-core load while EVERY static knob
(`governor`, `scaling_max_freq`, `EPP`, `platform_profile`)
still reads "full performance". That cap is INVISIBLE to
`cpu_scaling_factor()` and is the gremlin behind mass `trio`
deadline-miss failures on byte-identical code see
`scripts/cpu-perf-check`.
Best-effort: returns 1.0 on non-linux / missing sysfs / any
error so it can never break a test run.
'''
import glob
import multiprocessing as mp
def _read_mhz(path: str) -> int|None:
try:
return int(open(path).read()) // 1000
except OSError:
return None
try:
maxs: list[int] = [
v for f in glob.glob(
'/sys/devices/system/cpu/cpu[0-9]*/cpufreq/scaling_max_freq'
)
if (v := _read_mhz(f)) is not None
]
pkg_max: int = max(maxs) if maxs else 0
if not pkg_max:
return 1.
def _burn(stop: float) -> None:
x: int = 1
while time.perf_counter() < stop:
x += x * x ^ 0x5
# explicit `fork` ctx so we're immune to whatever global
# mp start-method tractor/the suite may have set (`spawn`
# would re-exec + re-import 24x — slow and pointless here).
ctx = mp.get_context('fork')
ncpu: int = os.cpu_count() or 1
stop: float = time.perf_counter() + secs
procs = [
ctx.Process(target=_burn, args=(stop,), daemon=True)
for _ in range(ncpu)
]
for p in procs:
p.start()
# skip the ~0.4s boost window so we sample the steady
# state AFTER any power-cap has engaged.
samples: list[int] = []
time.sleep(0.4)
while time.perf_counter() < stop - 0.1:
curs: list[int] = [
v for f in glob.glob(
'/sys/devices/system/cpu/cpu[0-9]*/cpufreq/scaling_cur_freq'
)
if (v := _read_mhz(f)) is not None
]
if curs:
samples.append(sum(curs) // len(curs))
time.sleep(0.15)
for p in procs:
p.join()
if not samples:
return 1.
frac: float = (sum(samples) // len(samples)) / pkg_max
# below the gate we read it as a power-cap throttle. The
# spawn/IPC/fork-bound work these budgets guard slows ~1:1
# with the achieved-vs-max freq ratio, so compensate by the
# FULL inverse fraction (a boost-discounted factor
# under-shoots and still trips the marginal cases).
if frac >= throttle_gate:
return 1.
return min(max_headroom, 1. / frac)
except Exception:
return 1.
def cpu_perf_headroom() -> float:
'''
Latency-headroom multiplier (>= 1.0) covering BOTH cpu-perf
throttle classes multiply a test's deadline by it, e.g.
`timeout *= cpu_perf_headroom()`:
- static cpu-freq scaling via `cpu_scaling_factor()`
(governor/policy lowered the `scaling_max_freq` ceiling).
- sustained-load power-cap throttle via
`_measure_sustained_headroom()` (firmware/EC PPT/STAPM
clamps achieved freq under load while every static knob
reads "performance"; INVISIBLE to the static check). This
is the gremlin behind mass `trio` deadline-miss failures
on unchanged code see
`ai/conc-anal/trio_033_cancel_cascade_slowdown_depth3_issue.md`.
The sustained probe runs ONCE per session (cached); the cost
is a ~0.9s all-core burn on first call only.
'''
global _sustained_headroom
static: float = cpu_scaling_factor()
if _non_linux:
return static
if _sustained_headroom is None:
_sustained_headroom = _measure_sustained_headroom()
return max(static, _sustained_headroom)
# NOTE, the `--ll`/`--tl` CLI flags + the `loglevel`, `test_log` # NOTE, the `--ll`/`--tl` CLI flags + the `loglevel`, `test_log`
# and `testing_pkg_name` fixtures have been factored into the # and `testing_pkg_name` fixtures have been factored into the
# `tractor._testing.pytest` plugin (loaded via the `-p` entry in # `tractor._testing.pytest` plugin (loaded via the `-p` entry in

View File

@ -794,6 +794,14 @@ def test_multi_nested_subactors_error_through_nurseries(
loglevel='pdb', loglevel='pdb',
) )
last_send_char: str|None = None last_send_char: str|None = None
# inflate pexpect waits under CPU throttle — incl. the
# sustained-load power-cap invisible to static freq reads — so
# a slow-to-boot child REPL doesn't trip a false `TIMEOUT`.
# See `scripts/cpu-perf-check`.
from ..conftest import cpu_perf_headroom
headroom: float = cpu_perf_headroom()
for ( for (
i, i,
send_char, send_char,
@ -817,6 +825,9 @@ def test_multi_nested_subactors_error_through_nurseries(
if is_forking_spawner: if is_forking_spawner:
timeout += 4 timeout += 4
if headroom != 1.:
timeout *= headroom
try: try:
child.expect( child.expect(
PROMPT, PROMPT,

View File

@ -188,11 +188,18 @@ def test_dynamic_pub_sub(
# sits forever until external SIGINT. The `afk_alarm_w_trace` # sits forever until external SIGINT. The `afk_alarm_w_trace`
# outer guard below is the AFK-safety counterpart (SIGALRM # outer guard below is the AFK-safety counterpart (SIGALRM
# raises in the main thread regardless of trio scope state). # raises in the main thread regardless of trio scope state).
fail_after_s: int = ( fail_after_s: float = (
8 8
if is_forking_spawner if is_forking_spawner
else 20 else 20
) )
# inflate under CPU throttle — incl. the sustained-load
# power-cap invisible to static freq reads — so a slow box
# doesn't trip the deadline. See `scripts/cpu-perf-check`.
from .conftest import cpu_perf_headroom
headroom: float = cpu_perf_headroom()
if headroom != 1.:
fail_after_s *= headroom
async def main(): async def main():
# bug-class-3 breadcrumb: tag each level of the cancel path # bug-class-3 breadcrumb: tag each level of the cancel path

View File

@ -592,6 +592,15 @@ async def test_nested_multierrors(
# depth=3, BOTH variants will reliably `xpass` and # depth=3, BOTH variants will reliably `xpass` and
# pytest will yell — our signal to drop the marker. See # pytest will yell — our signal to drop the marker. See
# `ai/conc-anal/cancel_cascade_too_slow_under_main_thread_forkserver_issue.md`. # `ai/conc-anal/cancel_cascade_too_slow_under_main_thread_forkserver_issue.md`.
#
# Probe CPU throttle ONCE up-front (folds in the sustained-load
# power-cap that static freq reads miss): used BOTH to inflate
# the deadline budget below AND to xfail depth=3, whose failure
# mode under throttle is a runtime-internal reap deadline — not
# a test-budget miss. See `scripts/cpu-perf-check`.
from .conftest import cpu_perf_headroom
headroom: float = cpu_perf_headroom()
if start_method == 'main_thread_forkserver': if start_method == 'main_thread_forkserver':
request.node.add_marker( request.node.add_marker(
pytest.mark.xfail( pytest.mark.xfail(
@ -605,6 +614,34 @@ async def test_nested_multierrors(
) )
) )
# Under CPU throttle (incl. the sustained-load power-cap that
# static freq reads miss) the DEEP depth=3 tree trips tractor's
# INTERNAL reap deadlines (`soft_kill`/`hard_kill`
# `move_on_after`/`terminate_after=1.6`) before slow subprocs
# exit, injecting a `Cancelled(source='deadline')` into the BEG
# — the SAME shape-mismatch class as the MTF xfail above, and
# NOT fixable by inflating the test-level budget (the Cancelled
# is minted inside the runtime, not by our `fail_after`).
# xfail(strict=False) so it auto-clears the moment the box is
# un-throttled (`headroom == 1.`); depth=1's shallow tree stays
# under those internal deadlines so it just rides the budget
# inflation below. See `scripts/cpu-perf-check`.
elif (
depth == 3
and
headroom != 1.
):
request.node.add_marker(
pytest.mark.xfail(
strict=False,
reason=(
'CPU throttled — tractor reap deadline injects '
'Cancelled into BEG; see conc-anal/'
'trio_033_cancel_cascade_slowdown_depth3_issue.md'
),
)
)
# Per-backend/-depth budgets: in the non-hang case the # Per-backend/-depth budgets: in the non-hang case the
# whole spawn + cancel-cascade should complete in well # whole spawn + cancel-cascade should complete in well
# under these. On the borderline hang case the # under these. On the borderline hang case the
@ -632,6 +669,11 @@ async def test_nested_multierrors(
case ('main_thread_forkserver', 3): case ('main_thread_forkserver', 3):
timeout = 30 timeout = 30
# inflate the budget by the throttle headroom probed above so
# a slow box doesn't masquerade as a deadline regression.
if headroom != 1.:
timeout *= headroom
async with fail_after_w_trace(timeout): async with fail_after_w_trace(timeout):
try: try:
async with tractor.open_nursery() as nursery: async with tractor.open_nursery() as nursery:

View File

@ -24,8 +24,14 @@ def test_empty_mngrs_input_raises(
'actor-cluster teardown hangs intermittently on UDS' 'actor-cluster teardown hangs intermittently on UDS'
) )
# inflate under CPU throttle — incl. the sustained-load
# power-cap invisible to static freq reads. See
# `scripts/cpu-perf-check`.
from .conftest import cpu_perf_headroom
fail_after_s: float = 3 * cpu_perf_headroom()
async def main(): async def main():
with trio.fail_after(3): with trio.fail_after(fail_after_s):
async with ( async with (
open_actor_cluster( open_actor_cluster(
modules=[__name__], modules=[__name__],
@ -93,6 +99,13 @@ async def test_streaming_to_actor_cluster(
10 if is_forking_spawner 10 if is_forking_spawner
else 6 else 6
) )
# inflate under CPU throttle — incl. the sustained-load
# power-cap invisible to static freq reads. See
# `scripts/cpu-perf-check`.
from .conftest import cpu_perf_headroom
headroom: float = cpu_perf_headroom()
if headroom != 1.:
delay *= headroom
with trio.fail_after(delay): with trio.fail_after(delay):
async with ( async with (
open_actor_cluster(modules=[__name__]) as portals, open_actor_cluster(modules=[__name__]) as portals,