Add `cpu_perf_headroom()` for throttle-aware deadlines
Mass `trio` deadline-miss failures on byte-identical code turned out to be a firmware/EC power-cap (AMD PPT/STAPM) clamping the all-core sustained clock while every static knob (`governor`, `scaling_max_freq`, EPP, platform-profile) still read "performance" — invisible to the existing `cpu_scaling_factor()` check. See `scripts/cpu-perf-check` + the `ai/conc-anal/trio_033_cancel_cascade_slowdown_depth3_issue.md` notes. Deats, - add `_measure_sustained_headroom()` to `tests/conftest.py`: a one-shot ~0.9s all-core burn (explicit `fork`-ctx `mp` procs) sampling achieved-vs-max freq AFTER the boost window; under a 0.6 gate it returns the full inverse fraction (capped 4x), else 1.0; best-effort 1.0 on non-linux or any error, - add `cpu_perf_headroom()`: `max()` of the static scaling factor and the (session-cached) sustained probe, - inflate deadline budgets by it in `test_dynamic_pub_sub`, both `test_clustering` cases, the `test_multi_nested_subactors_error_through_nurseries` pexpect waits + `test_nested_multierrors`, - `xfail(strict=False)` `test_nested_multierrors` depth=3 under throttle: the deep tree trips tractor's INTERNAL reap deadlines (`soft_kill`/`hard_kill` `terminate_after=1.6`) minting a `Cancelled` inside the runtime — not fixable by test-budget inflation; auto-clears once the box un-throttles. (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-codetest_cpu_throttling
parent
8526985c97
commit
20cb99ecd4
|
|
@ -120,6 +120,139 @@ def cpu_scaling_factor() -> float:
|
||||||
return 1.
|
return 1.
|
||||||
|
|
||||||
|
|
||||||
|
# session-cached sustained-load throttle multiplier — measured
|
||||||
|
# once (lazily) on the first `cpu_perf_headroom()` call. `None`
|
||||||
|
# = not-yet-measured.
|
||||||
|
_sustained_headroom: float|None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _measure_sustained_headroom(
|
||||||
|
secs: float = 0.9,
|
||||||
|
# a healthy all-core sustained clock holds AT/ABOVE this
|
||||||
|
# fraction of the package single-core max ceiling (boost sags
|
||||||
|
# under full multi-core load even un-throttled, but not far);
|
||||||
|
# at/above it we assume no throttle and return 1.0.
|
||||||
|
throttle_gate: float = 0.6,
|
||||||
|
max_headroom: float = 4.,
|
||||||
|
) -> float:
|
||||||
|
'''
|
||||||
|
One-shot all-core burn returning a latency multiplier
|
||||||
|
(>= 1.0) that reflects *sustained-load* CPU throttle.
|
||||||
|
|
||||||
|
Catches the firmware/EC power-cap clamp (AMD PPT/STAPM &
|
||||||
|
friends) that pins achieved `scaling_cur_freq` to a fraction
|
||||||
|
of the ceiling under multi-core load while EVERY static knob
|
||||||
|
(`governor`, `scaling_max_freq`, `EPP`, `platform_profile`)
|
||||||
|
still reads "full performance". That cap is INVISIBLE to
|
||||||
|
`cpu_scaling_factor()` and is the gremlin behind mass `trio`
|
||||||
|
deadline-miss failures on byte-identical code — see
|
||||||
|
`scripts/cpu-perf-check`.
|
||||||
|
|
||||||
|
Best-effort: returns 1.0 on non-linux / missing sysfs / any
|
||||||
|
error so it can never break a test run.
|
||||||
|
|
||||||
|
'''
|
||||||
|
import glob
|
||||||
|
import multiprocessing as mp
|
||||||
|
|
||||||
|
def _read_mhz(path: str) -> int|None:
|
||||||
|
try:
|
||||||
|
return int(open(path).read()) // 1000
|
||||||
|
except OSError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
maxs: list[int] = [
|
||||||
|
v for f in glob.glob(
|
||||||
|
'/sys/devices/system/cpu/cpu[0-9]*/cpufreq/scaling_max_freq'
|
||||||
|
)
|
||||||
|
if (v := _read_mhz(f)) is not None
|
||||||
|
]
|
||||||
|
pkg_max: int = max(maxs) if maxs else 0
|
||||||
|
if not pkg_max:
|
||||||
|
return 1.
|
||||||
|
|
||||||
|
def _burn(stop: float) -> None:
|
||||||
|
x: int = 1
|
||||||
|
while time.perf_counter() < stop:
|
||||||
|
x += x * x ^ 0x5
|
||||||
|
|
||||||
|
# explicit `fork` ctx so we're immune to whatever global
|
||||||
|
# mp start-method tractor/the suite may have set (`spawn`
|
||||||
|
# would re-exec + re-import 24x — slow and pointless here).
|
||||||
|
ctx = mp.get_context('fork')
|
||||||
|
ncpu: int = os.cpu_count() or 1
|
||||||
|
stop: float = time.perf_counter() + secs
|
||||||
|
procs = [
|
||||||
|
ctx.Process(target=_burn, args=(stop,), daemon=True)
|
||||||
|
for _ in range(ncpu)
|
||||||
|
]
|
||||||
|
for p in procs:
|
||||||
|
p.start()
|
||||||
|
|
||||||
|
# skip the ~0.4s boost window so we sample the steady
|
||||||
|
# state AFTER any power-cap has engaged.
|
||||||
|
samples: list[int] = []
|
||||||
|
time.sleep(0.4)
|
||||||
|
while time.perf_counter() < stop - 0.1:
|
||||||
|
curs: list[int] = [
|
||||||
|
v for f in glob.glob(
|
||||||
|
'/sys/devices/system/cpu/cpu[0-9]*/cpufreq/scaling_cur_freq'
|
||||||
|
)
|
||||||
|
if (v := _read_mhz(f)) is not None
|
||||||
|
]
|
||||||
|
if curs:
|
||||||
|
samples.append(sum(curs) // len(curs))
|
||||||
|
time.sleep(0.15)
|
||||||
|
for p in procs:
|
||||||
|
p.join()
|
||||||
|
|
||||||
|
if not samples:
|
||||||
|
return 1.
|
||||||
|
frac: float = (sum(samples) // len(samples)) / pkg_max
|
||||||
|
# below the gate we read it as a power-cap throttle. The
|
||||||
|
# spawn/IPC/fork-bound work these budgets guard slows ~1:1
|
||||||
|
# with the achieved-vs-max freq ratio, so compensate by the
|
||||||
|
# FULL inverse fraction (a boost-discounted factor
|
||||||
|
# under-shoots and still trips the marginal cases).
|
||||||
|
if frac >= throttle_gate:
|
||||||
|
return 1.
|
||||||
|
return min(max_headroom, 1. / frac)
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return 1.
|
||||||
|
|
||||||
|
|
||||||
|
def cpu_perf_headroom() -> float:
|
||||||
|
'''
|
||||||
|
Latency-headroom multiplier (>= 1.0) covering BOTH cpu-perf
|
||||||
|
throttle classes — multiply a test's deadline by it, e.g.
|
||||||
|
`timeout *= cpu_perf_headroom()`:
|
||||||
|
|
||||||
|
- static cpu-freq scaling — via `cpu_scaling_factor()`
|
||||||
|
(governor/policy lowered the `scaling_max_freq` ceiling).
|
||||||
|
|
||||||
|
- sustained-load power-cap throttle — via
|
||||||
|
`_measure_sustained_headroom()` (firmware/EC PPT/STAPM
|
||||||
|
clamps achieved freq under load while every static knob
|
||||||
|
reads "performance"; INVISIBLE to the static check). This
|
||||||
|
is the gremlin behind mass `trio` deadline-miss failures
|
||||||
|
on unchanged code — see
|
||||||
|
`ai/conc-anal/trio_033_cancel_cascade_slowdown_depth3_issue.md`.
|
||||||
|
|
||||||
|
The sustained probe runs ONCE per session (cached); the cost
|
||||||
|
is a ~0.9s all-core burn on first call only.
|
||||||
|
|
||||||
|
'''
|
||||||
|
global _sustained_headroom
|
||||||
|
static: float = cpu_scaling_factor()
|
||||||
|
if _non_linux:
|
||||||
|
return static
|
||||||
|
if _sustained_headroom is None:
|
||||||
|
_sustained_headroom = _measure_sustained_headroom()
|
||||||
|
return max(static, _sustained_headroom)
|
||||||
|
|
||||||
|
|
||||||
# NOTE, the `--ll`/`--tl` CLI flags + the `loglevel`, `test_log`
|
# NOTE, the `--ll`/`--tl` CLI flags + the `loglevel`, `test_log`
|
||||||
# and `testing_pkg_name` fixtures have been factored into the
|
# and `testing_pkg_name` fixtures have been factored into the
|
||||||
# `tractor._testing.pytest` plugin (loaded via the `-p` entry in
|
# `tractor._testing.pytest` plugin (loaded via the `-p` entry in
|
||||||
|
|
|
||||||
|
|
@ -794,6 +794,14 @@ def test_multi_nested_subactors_error_through_nurseries(
|
||||||
loglevel='pdb',
|
loglevel='pdb',
|
||||||
)
|
)
|
||||||
last_send_char: str|None = None
|
last_send_char: str|None = None
|
||||||
|
|
||||||
|
# inflate pexpect waits under CPU throttle — incl. the
|
||||||
|
# sustained-load power-cap invisible to static freq reads — so
|
||||||
|
# a slow-to-boot child REPL doesn't trip a false `TIMEOUT`.
|
||||||
|
# See `scripts/cpu-perf-check`.
|
||||||
|
from ..conftest import cpu_perf_headroom
|
||||||
|
headroom: float = cpu_perf_headroom()
|
||||||
|
|
||||||
for (
|
for (
|
||||||
i,
|
i,
|
||||||
send_char,
|
send_char,
|
||||||
|
|
@ -817,6 +825,9 @@ def test_multi_nested_subactors_error_through_nurseries(
|
||||||
if is_forking_spawner:
|
if is_forking_spawner:
|
||||||
timeout += 4
|
timeout += 4
|
||||||
|
|
||||||
|
if headroom != 1.:
|
||||||
|
timeout *= headroom
|
||||||
|
|
||||||
try:
|
try:
|
||||||
child.expect(
|
child.expect(
|
||||||
PROMPT,
|
PROMPT,
|
||||||
|
|
|
||||||
|
|
@ -188,11 +188,18 @@ def test_dynamic_pub_sub(
|
||||||
# sits forever until external SIGINT. The `afk_alarm_w_trace`
|
# sits forever until external SIGINT. The `afk_alarm_w_trace`
|
||||||
# outer guard below is the AFK-safety counterpart (SIGALRM
|
# outer guard below is the AFK-safety counterpart (SIGALRM
|
||||||
# raises in the main thread regardless of trio scope state).
|
# raises in the main thread regardless of trio scope state).
|
||||||
fail_after_s: int = (
|
fail_after_s: float = (
|
||||||
8
|
8
|
||||||
if is_forking_spawner
|
if is_forking_spawner
|
||||||
else 20
|
else 20
|
||||||
)
|
)
|
||||||
|
# inflate under CPU throttle — incl. the sustained-load
|
||||||
|
# power-cap invisible to static freq reads — so a slow box
|
||||||
|
# doesn't trip the deadline. See `scripts/cpu-perf-check`.
|
||||||
|
from .conftest import cpu_perf_headroom
|
||||||
|
headroom: float = cpu_perf_headroom()
|
||||||
|
if headroom != 1.:
|
||||||
|
fail_after_s *= headroom
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
# bug-class-3 breadcrumb: tag each level of the cancel path
|
# bug-class-3 breadcrumb: tag each level of the cancel path
|
||||||
|
|
|
||||||
|
|
@ -592,6 +592,15 @@ async def test_nested_multierrors(
|
||||||
# depth=3, BOTH variants will reliably `xpass` and
|
# depth=3, BOTH variants will reliably `xpass` and
|
||||||
# pytest will yell — our signal to drop the marker. See
|
# pytest will yell — our signal to drop the marker. See
|
||||||
# `ai/conc-anal/cancel_cascade_too_slow_under_main_thread_forkserver_issue.md`.
|
# `ai/conc-anal/cancel_cascade_too_slow_under_main_thread_forkserver_issue.md`.
|
||||||
|
#
|
||||||
|
# Probe CPU throttle ONCE up-front (folds in the sustained-load
|
||||||
|
# power-cap that static freq reads miss): used BOTH to inflate
|
||||||
|
# the deadline budget below AND to xfail depth=3, whose failure
|
||||||
|
# mode under throttle is a runtime-internal reap deadline — not
|
||||||
|
# a test-budget miss. See `scripts/cpu-perf-check`.
|
||||||
|
from .conftest import cpu_perf_headroom
|
||||||
|
headroom: float = cpu_perf_headroom()
|
||||||
|
|
||||||
if start_method == 'main_thread_forkserver':
|
if start_method == 'main_thread_forkserver':
|
||||||
request.node.add_marker(
|
request.node.add_marker(
|
||||||
pytest.mark.xfail(
|
pytest.mark.xfail(
|
||||||
|
|
@ -605,6 +614,34 @@ async def test_nested_multierrors(
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Under CPU throttle (incl. the sustained-load power-cap that
|
||||||
|
# static freq reads miss) the DEEP depth=3 tree trips tractor's
|
||||||
|
# INTERNAL reap deadlines (`soft_kill`/`hard_kill`
|
||||||
|
# `move_on_after`/`terminate_after=1.6`) before slow subprocs
|
||||||
|
# exit, injecting a `Cancelled(source='deadline')` into the BEG
|
||||||
|
# — the SAME shape-mismatch class as the MTF xfail above, and
|
||||||
|
# NOT fixable by inflating the test-level budget (the Cancelled
|
||||||
|
# is minted inside the runtime, not by our `fail_after`).
|
||||||
|
# xfail(strict=False) so it auto-clears the moment the box is
|
||||||
|
# un-throttled (`headroom == 1.`); depth=1's shallow tree stays
|
||||||
|
# under those internal deadlines so it just rides the budget
|
||||||
|
# inflation below. See `scripts/cpu-perf-check`.
|
||||||
|
elif (
|
||||||
|
depth == 3
|
||||||
|
and
|
||||||
|
headroom != 1.
|
||||||
|
):
|
||||||
|
request.node.add_marker(
|
||||||
|
pytest.mark.xfail(
|
||||||
|
strict=False,
|
||||||
|
reason=(
|
||||||
|
'CPU throttled — tractor reap deadline injects '
|
||||||
|
'Cancelled into BEG; see conc-anal/'
|
||||||
|
'trio_033_cancel_cascade_slowdown_depth3_issue.md'
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Per-backend/-depth budgets: in the non-hang case the
|
# Per-backend/-depth budgets: in the non-hang case the
|
||||||
# whole spawn + cancel-cascade should complete in well
|
# whole spawn + cancel-cascade should complete in well
|
||||||
# under these. On the borderline hang case the
|
# under these. On the borderline hang case the
|
||||||
|
|
@ -632,6 +669,11 @@ async def test_nested_multierrors(
|
||||||
case ('main_thread_forkserver', 3):
|
case ('main_thread_forkserver', 3):
|
||||||
timeout = 30
|
timeout = 30
|
||||||
|
|
||||||
|
# inflate the budget by the throttle headroom probed above so
|
||||||
|
# a slow box doesn't masquerade as a deadline regression.
|
||||||
|
if headroom != 1.:
|
||||||
|
timeout *= headroom
|
||||||
|
|
||||||
async with fail_after_w_trace(timeout):
|
async with fail_after_w_trace(timeout):
|
||||||
try:
|
try:
|
||||||
async with tractor.open_nursery() as nursery:
|
async with tractor.open_nursery() as nursery:
|
||||||
|
|
|
||||||
|
|
@ -24,8 +24,14 @@ def test_empty_mngrs_input_raises(
|
||||||
'actor-cluster teardown hangs intermittently on UDS'
|
'actor-cluster teardown hangs intermittently on UDS'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# inflate under CPU throttle — incl. the sustained-load
|
||||||
|
# power-cap invisible to static freq reads. See
|
||||||
|
# `scripts/cpu-perf-check`.
|
||||||
|
from .conftest import cpu_perf_headroom
|
||||||
|
fail_after_s: float = 3 * cpu_perf_headroom()
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
with trio.fail_after(3):
|
with trio.fail_after(fail_after_s):
|
||||||
async with (
|
async with (
|
||||||
open_actor_cluster(
|
open_actor_cluster(
|
||||||
modules=[__name__],
|
modules=[__name__],
|
||||||
|
|
@ -93,6 +99,13 @@ async def test_streaming_to_actor_cluster(
|
||||||
10 if is_forking_spawner
|
10 if is_forking_spawner
|
||||||
else 6
|
else 6
|
||||||
)
|
)
|
||||||
|
# inflate under CPU throttle — incl. the sustained-load
|
||||||
|
# power-cap invisible to static freq reads. See
|
||||||
|
# `scripts/cpu-perf-check`.
|
||||||
|
from .conftest import cpu_perf_headroom
|
||||||
|
headroom: float = cpu_perf_headroom()
|
||||||
|
if headroom != 1.:
|
||||||
|
delay *= headroom
|
||||||
with trio.fail_after(delay):
|
with trio.fail_after(delay):
|
||||||
async with (
|
async with (
|
||||||
open_actor_cluster(modules=[__name__]) as portals,
|
open_actor_cluster(modules=[__name__]) as portals,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue