#!/usr/bin/env python3
# tractor: structured concurrent "actors".
# Copyright 2018-eternity Tyler Goodlet.
#
# SPDX-License-Identifier: AGPL-3.0-or-later
'''
`tractor-reap` — SC-polite zombie-subactor reaper +
optional `/dev/shm/` orphan-segment sweep.

Two cleanup phases (run in order when both are enabled):

1. **process reap** — finds `tractor` subactor processes
   left alive after a `pytest` (or any tractor-app) run
   that failed to fully cancel its actor tree, then sends
   SIGINT with a bounded grace window before escalating
   to SIGKILL.

2. **shm sweep** (`--shm` / `--shm-only`) — unlinks
   `/dev/shm/<file>` entries owned by the current uid
   that no live process has open (mmap'd or fd-held).
   Needed because `tractor` disables
   `mp.resource_tracker` (see `tractor.ipc._mp_bs`), so a
   hard-crashing actor leaves leaked segments that
   nothing else GCs.

3. **UDS sweep** (`--uds` / `--uds-only`) — unlinks
   `${XDG_RUNTIME_DIR}/tractor/<name>@<pid>.sock` files
   whose binder pid is dead (or the `1616` registry
   sentinel). Needed because the IPC server's
   `os.unlink()` cleanup lives in a `finally:` block
   that doesn't always run on hard exits (SIGKILL,
   escaped `KeyboardInterrupt`, etc.) — see issue #452.

Process-reap detection modes (auto-selected):

    --parent <pid>  : descendant-mode — kill procs whose
                      PPid == <pid>. Use when a parent
                      is still alive and you want to
                      scope the sweep precisely (e.g.
                      CI wrapper calling in from outside
                      pytest).

    (default)       : orphan-mode — kill procs with
                      PPid==1 (init-reparented) whose
                      cwd matches the repo root AND
                      whose cmdline contains `python`.
                      The cwd filter is what prevents
                      sweeping unrelated init-children.

Usage:

    # process reap only (default)
    scripts/tractor-reap

    # process reap + shm sweep
    scripts/tractor-reap --shm

    # only the shm sweep, skip process reap
    scripts/tractor-reap --shm-only

    # process reap + shm + UDS sweep (the works)
    scripts/tractor-reap --shm --uds

    # only UDS sweep
    scripts/tractor-reap --uds-only

    # from inside a still-live supervisor
    scripts/tractor-reap --parent 12345

    # dry-run: list what would be reaped, don't act
    scripts/tractor-reap -n
    scripts/tractor-reap --shm --uds -n

'''
import argparse
import pathlib
import subprocess
import sys


def _repo_root() -> pathlib.Path:
    '''
    Use `git rev-parse --show-toplevel` when available;
    fall back to the repo this script lives in.

    '''
    try:
        out: str = subprocess.check_output(
            ['git', 'rev-parse', '--show-toplevel'],
            stderr=subprocess.DEVNULL,
            text=True,
        ).strip()
        return pathlib.Path(out)
    except (subprocess.CalledProcessError, FileNotFoundError):
        return pathlib.Path(__file__).resolve().parent.parent


def main() -> int:
    parser = argparse.ArgumentParser(
        prog='tractor-reap',
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        '--parent', '-p',
        type=int,
        default=None,
        help='descendant-mode: reap procs with PPid==<pid>',
    )
    parser.add_argument(
        '--grace', '-g',
        type=float,
        default=3.0,
        help='SIGINT grace window in seconds (default 3.0)',
    )
    parser.add_argument(
        '--dry-run', '-n',
        action='store_true',
        help='list matched pids/paths but do not signal/unlink',
    )
    parser.add_argument(
        '--shm',
        action='store_true',
        help=(
            'after process reap, also unlink orphaned '
            '/dev/shm segments owned by the current user '
            'that no live process is mapping or holding open'
        ),
    )
    parser.add_argument(
        '--shm-only',
        action='store_true',
        help='skip process reap; only do the shm sweep',
    )
    parser.add_argument(
        '--uds',
        action='store_true',
        help=(
            'after process reap, also unlink orphaned '
            '${XDG_RUNTIME_DIR}/tractor/*.sock files '
            'whose binder pid is dead (or the 1616 '
            'registry sentinel). See issue #452.'
        ),
    )
    parser.add_argument(
        '--uds-only',
        action='store_true',
        help='skip process reap + shm; only do the UDS sweep',
    )
    args = parser.parse_args()
    # any *-only flag also skips the process reap phase
    skip_proc_reap: bool = (
        args.shm_only
        or
        args.uds_only
    )

    # import lazily so `--help` doesn't require the tractor
    # package to be importable (e.g. when running from a
    # shell not inside a venv).
    repo = _repo_root()
    sys.path.insert(0, str(repo))
    from tractor._testing._reap import (
        find_descendants,
        find_orphans,
        find_orphaned_shm,
        find_orphaned_uds,
        reap,
        reap_shm,
        reap_uds,
    )

    rc: int = 0

    # --- phase 1: process reap (skipped under --*-only) ---
    if not skip_proc_reap:
        if args.parent is not None:
            pids: list[int] = find_descendants(args.parent)
            mode: str = f'descendants of PPid={args.parent}'
        else:
            pids = find_orphans(repo)
            mode = f'orphans (PPid=1, cwd={repo})'

        if not pids:
            print(f'[tractor-reap] no {mode} to reap')
        elif args.dry_run:
            print(
                f'[tractor-reap] dry-run — {mode}:\n  {pids}'
            )
        else:
            _, survivors = reap(pids, grace=args.grace)
            if survivors:
                rc = 1

    # --- phase 2: shm sweep (opt-in) ---
    if args.shm or args.shm_only:
        leaked: list[str] = find_orphaned_shm()
        if not leaked:
            print(
                '[tractor-reap] no orphaned /dev/shm '
                'segments to sweep'
            )
        elif args.dry_run:
            print(
                f'[tractor-reap] dry-run — {len(leaked)} '
                f'orphaned shm segment(s):\n  {leaked}'
            )
        else:
            _, errors = reap_shm(leaked)
            if errors:
                rc = 1

    # --- phase 3: UDS sweep (opt-in) ---
    if args.uds or args.uds_only:
        leaked_uds: list[str] = find_orphaned_uds()
        if not leaked_uds:
            print(
                '[tractor-reap] no orphaned UDS sock-files '
                'to sweep'
            )
        elif args.dry_run:
            print(
                f'[tractor-reap] dry-run — {len(leaked_uds)} '
                f'orphaned UDS sock-file(s):\n  {leaked_uds}'
            )
        else:
            _, errors = reap_uds(leaked_uds)
            if errors:
                rc = 1

    # exit 0 if everything cleaned cleanly, else 1 — useful
    # for CI health-check chaining.
    return rc


if __name__ == '__main__':
    raise SystemExit(main())
