#!/usr/bin/env python3 """Collect deterministic environment manifests for Python/UV, ObjC, and C++ stacks. This script is local-only and does not attempt to alter build outputs. It is intentionally resilient: all command failures are captured as structured errors so probing works even on partial toolchain setups. """ from __future__ import annotations import argparse import hashlib import json import os import platform import shlex import shutil import subprocess import sys from dataclasses import dataclass, field from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Sequence REPO_ROOT = Path(__file__).resolve().parent.parent DEFAULT_JSON = REPO_ROOT / "training" / "build_env" / "toolchain_env.json" DEFAULT_MARKDOWN = REPO_ROOT / "training" / "build_env" / "toolchain_env.md" TEXT_LIMIT = 4000 @dataclass class CommandProbe: command: List[str] return_code: Optional[int] = None stdout: str = "" stderr: str = "" timed_out: bool = False missing: bool = False def as_dict(self) -> Dict[str, Any]: return { "command": " ".join(shlex.quote(p) for p in self.command), "return_code": self.return_code, "stdout": self.stdout, "stderr": self.stderr, "timed_out": self.timed_out, "missing": self.missing, } @property def ok(self) -> bool: return (not self.missing) and (not self.timed_out) and (self.return_code == 0) def _truncate(text: str, max_len: int = TEXT_LIMIT) -> str: if len(text) <= max_len: return text return text[: max_len - 3] + "..." def _run_command( command: Sequence[str], cwd: Optional[Path] = None, timeout: int = 8, env: Optional[Dict[str, str]] = None, ) -> CommandProbe: result = CommandProbe(list(command)) try: proc = subprocess.run( list(command), cwd=str(cwd) if cwd else None, env=env, check=False, capture_output=True, text=True, timeout=timeout, ) result.return_code = int(proc.returncode) result.stdout = _truncate((proc.stdout or "").strip()) result.stderr = _truncate((proc.stderr or "").strip()) except FileNotFoundError: result.missing = True result.return_code = None result.stderr = f"command not found: {command[0] if command else ''}" except subprocess.TimeoutExpired as exc: result.timed_out = True result.stdout = _truncate(exc.stdout or "") result.stderr = _truncate(exc.stderr or "") except Exception as exc: # defensive: collect diagnostics, never crash. result.stderr = f"{type(exc).__name__}: {exc}" return result def _command_matrix(command: Sequence[str], cwd: Optional[Path] = None) -> Dict[str, Any]: probe = _run_command(command, cwd=cwd) return { "ok": probe.ok, "code": probe.return_code, "missing": probe.missing, "timed_out": probe.timed_out, "stdout": probe.stdout, "stderr": probe.stderr, "command": probe.as_dict()["command"], } def _system_profiler_payload() -> Dict[str, Any]: result = _run_command(["system_profiler", "SPHardwareDataType", "-json"], timeout=12) if not result.ok: return {"captured": False, "command": result.as_dict()} try: parsed = json.loads(result.stdout or "{}") except Exception as exc: return { "captured": False, "command": result.as_dict(), "parse_error": f"{type(exc).__name__}: {exc}", } hardware = parsed.get("SPHardwareDataType", []) first = hardware[0] if isinstance(hardware, list) and hardware else {} if not isinstance(first, dict): first = {} return { "captured": True, "hardware": { "chip_model": first.get("machine_model", ""), "chip_type": first.get("chip_type", ""), "model": first.get("machine_model", ""), "physical_cores": first.get("number_processors", ""), "memory_gb": first.get("physical_memory", ""), "boot_rom": first.get("boot_rom_version", ""), "os_version": first.get("os_version", ""), "serial_number": first.get("serial_number", ""), "identifier": first.get("machine_model", ""), }, "raw": result.as_dict(), } def _probe_uv() -> Dict[str, Any]: payload: Dict[str, Any] = { "uv_path": shutil.which("uv"), "uv_version": _command_matrix(["uv", "--version"]), "pip_version": _command_matrix([sys.executable, "-m", "pip", "--version"]), "venv": { "active_prefix": sys.prefix, "executable": sys.executable, "pyhome": sys.base_prefix, }, } return payload def _probe_python_env() -> Dict[str, Any]: torch_payload = {"available": False} try: import torch # local import only torch_payload = { "available": True, "version": str(torch.__version__), "mps_available": bool(torch.backends.mps.is_available()), "cuda_available": bool(torch.cuda.is_available()), "cpu_available": True, } except Exception as exc: torch_payload["error"] = f"{type(exc).__name__}: {exc}" env_keys = { "ANEPATH": os.environ.get("ANE_SRAM_MLPACKAGE_PATH", ""), "ANEDIR": os.environ.get("ANE_SRAM_MLPACKAGE_DIR", ""), "ANEDIRS": os.environ.get("ANE_SRAM_MLPACKAGE_DIRS", ""), "ANE_REPORT": os.environ.get("ANE_ARTIFACT_REPORT", ""), } return { "python": sys.version, "platform": platform.platform(), "platform_machine": platform.machine(), "implementation": platform.python_implementation(), "env": {k: v for k, v in env_keys.items()}, "torch": torch_payload, } def _probe_hardware() -> Dict[str, Any]: sysctl_keys = [ "hw.model", "hw.ncpu", "hw.memsize", "hw.optional.arm.FEAT_SME", "hw.optional.arm.FEAT_SME2", "hw.optional.arm.FEAT_BF16", "hw.optional.arm.FEAT_I8MM", "hw.optional.arm.FEAT_FP16", ] return { "uname": { "all": _command_matrix(["uname", "-a"]), "mach": _command_matrix(["uname", "-m"]), }, "os": { "sw_vers": _command_matrix(["sw_vers"]), }, "sysctl": {key: _command_matrix(["sysctl", "-n", key]) for key in sysctl_keys}, "hardware_profile": _system_profiler_payload(), "host_name": platform.node(), "user": os.environ.get("USER", ""), } def _probe_objc() -> Dict[str, Any]: clang_path = _command_matrix(["xcrun", "--find", "clang"]) coreml_compiler = _command_matrix(["xcrun", "-f", "coremlcompiler"]) probe_commands: Dict[str, Any] = { "xcrun_clang": clang_path, "xcrun_sdk_path": _command_matrix(["xcrun", "--show-sdk-path"]), "xcrun_sdk_version": _command_matrix(["xcrun", "--show-sdk-version"]), "xcrun_sdk_platform_version": _command_matrix( ["xcrun", "--show-sdk-platform-version"] ), "xcode_select_path": _command_matrix(["xcode-select", "-p"]), "xcodebuild_version": _command_matrix(["xcodebuild", "-version"]), "coremlcompiler_lookup": coreml_compiler, "clang_version": _command_matrix(["clang", "--version"]), } objc_sanity = {"compiled": False} if not clang_path.get("missing") and clang_path.get("code") == 0: with tempfile_for_objc_probe() as tempdir: temp_path = Path(tempdir) source = temp_path / "ane_objc_probe.m" source.write_text( """ #import #import int main(void) { return 0; } """.strip() ) object_file = temp_path / "ane_objc_probe.o" compile_result = _run_command( [ "xcrun", "clang", "-fsyntax-only", "-x", "objective-c", "-fobjc-arc", "-framework", "Foundation", "-framework", "CoreML", str(source), ], cwd=temp_path, ) objc_sanity["probe_compile"] = compile_result.as_dict() objc_sanity["compiled"] = compile_result.ok if compile_result.ok: objc_sanity["object_file"] = str(object_file) else: objc_sanity["probe_compile"] = { "command": "xcrun clang -fsyntax-only ...", "return_code": None, "stdout": "", "stderr": "clang probe skipped: xcrun --find clang failed", "missing": True, "timed_out": False, } return {"commands": probe_commands, "sanity": objc_sanity} def tempfile_for_objc_probe(): from tempfile import TemporaryDirectory return TemporaryDirectory() def _probe_cpp() -> Dict[str, Any]: cmake_bin = _command_matrix(["cmake", "--version"]) results = { "cmake": cmake_bin, "configure": None, } if cmake_bin.get("code") == 0 and cmake_bin.get("ok"): with tempfile_for_objc_probe() as tmp: build_dir = Path(tmp) / "ane-main-cpp-build" configure = _run_command( [ "cmake", "-S", str(REPO_ROOT / "ane-main-cpp"), "-B", str(build_dir), "-DANE_SKIP_SME2_RUNTIME_CHECK=ON", "-DCMAKE_BUILD_TYPE=Release", "-Wno-dev", ], timeout=30, ) results["configure"] = configure.as_dict() return results def _compute_signature(payload: Dict[str, Any]) -> str: digest_source = json.dumps(payload, sort_keys=True, ensure_ascii=True).encode("utf-8") return hashlib.sha256(digest_source).hexdigest()[:16] def _build_markdown(manifest: Dict[str, Any]) -> str: sections: List[str] = [] sections.append(f"# ANE Toolchain Probe Report\n\nGenerated: {manifest['generated_utc']}\n") sections.append("## Host\n") sections.append(f"- Platform: {manifest['host']['platform']}\n") sections.append(f"- Machine: {manifest['host']['machine']}\n") sections.append(f"- Hostname: {manifest['host']['hostname']}\n\n") sections.append("## Scope\n") sections.append(f"- Scope: `{manifest['scope']}`\n") sections.append(f"- Signature: `{manifest['signature']}`\n\n") sections.append("## Python + UV\n") sections.append(f"- Python: {manifest['python']['python']}\n") sections.append(f"- UV available: {bool(manifest['uv']['uv_path'])}\n\n") sections.append("## Toolchain Commands\n") for section, values in manifest["commands"].items(): if not isinstance(values, dict): continue sections.append(f"### {section}\n") if section == "objc": for cmd_name, detail in values.get("commands", {}).items(): if isinstance(detail, dict): ok = detail.get("ok", False) sections.append(f"- {cmd_name}: {'ok' if ok else 'fail'}\n") else: for cmd_name, detail in values.items(): if isinstance(detail, dict): ok = detail.get("ok", False) sections.append(f"- {cmd_name}: {'ok' if ok else 'fail'}\n") sections.append("\n") hardware = manifest.get("hardware", {}) sections.append("## Hardware Probe\n") for key, value in hardware.get("sysctl", {}).items(): if isinstance(value, dict): sections.append(f"- {key}: {value.get('stdout', '')}\n") sections.append("\n") return "".join(sections) def collect_manifest(scope: str, probe_cpp: bool = False) -> Dict[str, Any]: include_python = scope in {"full", "python"} include_objc = scope in {"full", "objc"} include_cpp = scope in {"full", "cpp"} manifest: Dict[str, Any] = { "schema_version": 1, "generated_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"), "scope": scope, "repository": str(REPO_ROOT), "repo_git_head": _run_command(["git", "rev-parse", "HEAD"], cwd=REPO_ROOT).stdout if include_python else "", "host": { "hostname": platform.node(), "platform": platform.platform(), "machine": platform.machine(), }, } if include_python: manifest["python"] = _probe_python_env() manifest["uv"] = _probe_uv() manifest["hardware"] = _probe_hardware() if include_objc: manifest["commands"] = manifest.get("commands", {}) manifest["commands"]["objc"] = _probe_objc() if include_cpp: manifest["commands"] = manifest.get("commands", {}) if probe_cpp: manifest["commands"]["cpp"] = _probe_cpp() else: manifest["commands"]["cpp"] = {"status": "probe-disabled"} manifest["signature"] = _compute_signature( {k: v for k, v in manifest.items() if k != "signature"} ) return manifest def main(argv: Optional[Sequence[str]] = None) -> int: parser = argparse.ArgumentParser( description="Collect local Python/UV/ObjC/C++ environment and build metadata." ) parser.add_argument( "--scope", default="full", choices=["full", "python", "objc", "cpp"], help="How deep the probe should run.", ) parser.add_argument( "--probe-cpp", action="store_true", help="Run CMake configure probe for ane-main-cpp.", ) parser.add_argument( "--out-json", default=str(DEFAULT_JSON), help="Where to write JSON output. Use '-' for stdout only.", ) parser.add_argument( "--out-markdown", default="", help="Optional markdown summary output file.", ) parser.add_argument( "--pretty", action="store_true", help="Pretty-print JSON in four-space indentation.", ) args = parser.parse_args(list(argv) if argv is not None else None) manifest = collect_manifest(args.scope, probe_cpp=args.probe_cpp) json_text = json.dumps( manifest, indent=4 if args.pretty else None, ensure_ascii=True ) if args.out_json != "-": out_json = Path(args.out_json) out_json.parent.mkdir(parents=True, exist_ok=True) out_json.write_text(json_text, encoding="utf-8") else: print(json_text) if args.out_markdown: Path(args.out_markdown).parent.mkdir(parents=True, exist_ok=True) Path(args.out_markdown).write_text(_build_markdown(manifest), encoding="utf-8") if args.out_json == "-" and not args.out_markdown: return 0 print(f"manifest_written={args.out_json}") if args.out_markdown: print(f"markdown_written={args.out_markdown}") return 0 if __name__ == "__main__": raise SystemExit(main())