Spaces:
Sleeping
Sleeping
| import platform | |
| import subprocess | |
| import sys | |
| import threading | |
| import warnings | |
| from typing import TYPE_CHECKING, Any | |
| if TYPE_CHECKING: | |
| from trackio.run import Run | |
| psutil: Any = None | |
| PSUTIL_AVAILABLE = False | |
| _monitor_lock = threading.Lock() | |
| def _ensure_psutil(): | |
| global PSUTIL_AVAILABLE, psutil | |
| if PSUTIL_AVAILABLE: | |
| return psutil | |
| try: | |
| import psutil as _psutil | |
| psutil = _psutil | |
| PSUTIL_AVAILABLE = True | |
| return psutil | |
| except ImportError: | |
| raise ImportError( | |
| "psutil is required for Apple Silicon monitoring. " | |
| "Install it with: pip install psutil" | |
| ) | |
| def is_apple_silicon() -> bool: | |
| """Check if running on Apple Silicon (M1/M2/M3/M4).""" | |
| if platform.system() != "Darwin": | |
| return False | |
| try: | |
| result = subprocess.run( | |
| ["sysctl", "-n", "machdep.cpu.brand_string"], | |
| capture_output=True, | |
| text=True, | |
| timeout=1, | |
| ) | |
| cpu_brand = result.stdout.strip() | |
| return "Apple" in cpu_brand | |
| except Exception: | |
| return False | |
| def get_gpu_info() -> dict[str, Any]: | |
| """Get Apple GPU information using ioreg.""" | |
| try: | |
| result = subprocess.run( | |
| ["ioreg", "-r", "-d", "1", "-w", "0", "-c", "IOAccelerator"], | |
| capture_output=True, | |
| text=True, | |
| timeout=2, | |
| ) | |
| if result.returncode == 0 and result.stdout: | |
| lines = result.stdout.strip().split("\n") | |
| for line in lines: | |
| if "IOAccelerator" in line and "class" in line: | |
| return {"detected": True, "type": "Apple GPU"} | |
| else: | |
| print("Error collecting Apple GPU info. ioreg stdout was:", file=sys.stderr) | |
| print(result.stdout, file=sys.stderr) | |
| print("ioreg stderr was:", file=sys.stderr) | |
| print(result.stderr, file=sys.stderr) | |
| result = subprocess.run( | |
| ["system_profiler", "SPDisplaysDataType"], | |
| capture_output=True, | |
| text=True, | |
| timeout=3, | |
| ) | |
| if result.returncode == 0 and "Apple" in result.stdout: | |
| for line in result.stdout.split("\n"): | |
| if "Chipset Model:" in line: | |
| model = line.split(":")[-1].strip() | |
| return {"detected": True, "type": model} | |
| except Exception: | |
| pass | |
| return {"detected": False} | |
| def apple_gpu_available() -> bool: | |
| """ | |
| Check if Apple GPU monitoring is available. | |
| Returns True if running on Apple Silicon (M-series chips) and psutil is installed. | |
| """ | |
| try: | |
| _ensure_psutil() | |
| return is_apple_silicon() | |
| except ImportError: | |
| return False | |
| except Exception: | |
| return False | |
| def collect_apple_metrics() -> dict: | |
| """ | |
| Collect system metrics for Apple Silicon. | |
| Returns: | |
| Dictionary of system metrics including CPU, memory, and GPU info. | |
| """ | |
| if not PSUTIL_AVAILABLE: | |
| try: | |
| _ensure_psutil() | |
| except ImportError: | |
| return {} | |
| metrics = {} | |
| try: | |
| cpu_percent = psutil.cpu_percent(interval=0.1, percpu=False) | |
| metrics["cpu/utilization"] = cpu_percent | |
| except Exception: | |
| pass | |
| try: | |
| cpu_percents = psutil.cpu_percent(interval=0.1, percpu=True) | |
| for i, percent in enumerate(cpu_percents): | |
| metrics[f"cpu/{i}/utilization"] = percent | |
| except Exception: | |
| pass | |
| try: | |
| cpu_freq = psutil.cpu_freq() | |
| if cpu_freq: | |
| metrics["cpu/frequency"] = cpu_freq.current | |
| if cpu_freq.max > 0: | |
| metrics["cpu/frequency_max"] = cpu_freq.max | |
| except Exception: | |
| pass | |
| try: | |
| mem = psutil.virtual_memory() | |
| metrics["memory/used"] = mem.used / (1024**3) | |
| metrics["memory/total"] = mem.total / (1024**3) | |
| metrics["memory/available"] = mem.available / (1024**3) | |
| metrics["memory/percent"] = mem.percent | |
| except Exception: | |
| pass | |
| try: | |
| swap = psutil.swap_memory() | |
| metrics["swap/used"] = swap.used / (1024**3) | |
| metrics["swap/total"] = swap.total / (1024**3) | |
| metrics["swap/percent"] = swap.percent | |
| except Exception: | |
| pass | |
| try: | |
| sensors_temps = psutil.sensors_temperatures() | |
| if sensors_temps: | |
| for name, entries in sensors_temps.items(): | |
| for i, entry in enumerate(entries): | |
| label = entry.label or f"{name}_{i}" | |
| metrics[f"temp/{label}"] = entry.current | |
| except Exception: | |
| pass | |
| gpu_info = get_gpu_info() | |
| if gpu_info.get("detected"): | |
| metrics["gpu/detected"] = 1 | |
| if "type" in gpu_info: | |
| pass | |
| return metrics | |
| class AppleGpuMonitor: | |
| def __init__(self, run: "Run", interval: float = 10.0): | |
| self._run = run | |
| self._interval = interval | |
| self._stop_flag = threading.Event() | |
| self._thread: "threading.Thread | None" = None | |
| def start(self): | |
| if not is_apple_silicon(): | |
| warnings.warn( | |
| "auto_log_gpu=True but not running on Apple Silicon. " | |
| "Apple GPU logging disabled." | |
| ) | |
| return | |
| if not PSUTIL_AVAILABLE: | |
| try: | |
| _ensure_psutil() | |
| except ImportError: | |
| warnings.warn( | |
| "auto_log_gpu=True but psutil not installed. " | |
| "Install with: pip install psutil" | |
| ) | |
| return | |
| self._thread = threading.Thread(target=self._monitor_loop, daemon=True) | |
| self._thread.start() | |
| def stop(self): | |
| self._stop_flag.set() | |
| if self._thread is not None: | |
| self._thread.join(timeout=2.0) | |
| def _monitor_loop(self): | |
| while not self._stop_flag.is_set(): | |
| try: | |
| metrics = collect_apple_metrics() | |
| if metrics: | |
| self._run.log_system(metrics) | |
| except Exception: | |
| pass | |
| self._stop_flag.wait(timeout=self._interval) | |
| def log_apple_gpu(run: "Run | None" = None) -> dict: | |
| """ | |
| Log Apple Silicon system metrics to the current or specified run. | |
| Args: | |
| run: Optional Run instance. If None, uses current run from context. | |
| Returns: | |
| dict: The system metrics that were logged. | |
| Example: | |
| ```python | |
| import trackio | |
| run = trackio.init(project="my-project") | |
| trackio.log({"loss": 0.5}) | |
| trackio.log_apple_gpu() | |
| ``` | |
| """ | |
| from trackio import context_vars | |
| if run is None: | |
| run = context_vars.current_run.get() | |
| if run is None: | |
| raise RuntimeError("Call trackio.init() before trackio.log_apple_gpu().") | |
| metrics = collect_apple_metrics() | |
| if metrics: | |
| run.log_system(metrics) | |
| return metrics | |