Spaces:
Running
Running
| import os | |
| import sys | |
| import json | |
| import time | |
| import socket | |
| import platform | |
| import subprocess | |
| import traceback | |
| import datetime | |
| from typing import Any, Dict | |
| from fastapi import APIRouter | |
| router = APIRouter() | |
| # ========================================================= | |
| # SAFE EXECUTION WRAPPER (NEVER FAIL) | |
| # ========================================================= | |
| def safe_run(name, func): | |
| start = time.time() | |
| try: | |
| return { | |
| "status": "ok", | |
| "duration_sec": round(time.time() - start, 3), | |
| "data": func(), | |
| } | |
| except Exception as e: | |
| return { | |
| "status": "error", | |
| "duration_sec": round(time.time() - start, 3), | |
| "error": str(e), | |
| "traceback": traceback.format_exc(limit=2), | |
| } | |
| # ========================================================= | |
| # COMMAND RUNNER | |
| # ========================================================= | |
| def run_cmd(cmd): | |
| try: | |
| r = subprocess.run( | |
| cmd, | |
| capture_output=True, | |
| text=True, | |
| timeout=25, | |
| ) | |
| return { | |
| "cmd": " ".join(cmd), | |
| "returncode": r.returncode, | |
| "stdout": r.stdout.strip(), | |
| "stderr": r.stderr.strip(), | |
| } | |
| except Exception as e: | |
| return {"cmd": " ".join(cmd), "error": str(e)} | |
| # ========================================================= | |
| # SYSTEM INFO | |
| # ========================================================= | |
| def system_info(): | |
| return { | |
| "time_utc": datetime.datetime.utcnow().isoformat(), | |
| "hostname": socket.gethostname(), | |
| "platform": platform.platform(), | |
| "python": sys.version, | |
| "executable": sys.executable, | |
| "cwd": os.getcwd(), | |
| "pid": os.getpid(), | |
| "cpu_count": os.cpu_count(), | |
| } | |
| # ========================================================= | |
| # ENVIRONMENT VARIABLES (MASK SECRETS) | |
| # ========================================================= | |
| def env_info(): | |
| masked = {} | |
| for k, v in os.environ.items(): | |
| if any(x in k.lower() for x in ["token", "secret", "password", "key"]): | |
| masked[k] = "***hidden***" | |
| else: | |
| masked[k] = v | |
| return masked | |
| # ========================================================= | |
| # GPU / TORCH INFO | |
| # ========================================================= | |
| def gpu_info(): | |
| try: | |
| import torch | |
| data = { | |
| "cuda_available": torch.cuda.is_available(), | |
| "device_count": torch.cuda.device_count(), | |
| "torch_version": torch.__version__, | |
| } | |
| if torch.cuda.is_available(): | |
| data.update({ | |
| "device_name": torch.cuda.get_device_name(0), | |
| "memory_allocated": torch.cuda.memory_allocated(0), | |
| "memory_reserved": torch.cuda.memory_reserved(0), | |
| "memory_total": torch.cuda.get_device_properties(0).total_memory, | |
| }) | |
| return data | |
| except Exception as e: | |
| return {"torch_error": str(e)} | |
| # ========================================================= | |
| # HUGGING FACE CACHE | |
| # ========================================================= | |
| def hf_cache_info(): | |
| cache_dir = os.getenv("HF_HOME", "/tmp/huggingface") | |
| return { | |
| "cache_dir": cache_dir, | |
| "exists": os.path.exists(cache_dir), | |
| "files_sample": os.listdir(cache_dir)[:30] | |
| if os.path.exists(cache_dir) | |
| else [], | |
| } | |
| # ========================================================= | |
| # INSTALLED PACKAGES | |
| # ========================================================= | |
| def packages_info(): | |
| try: | |
| import pkg_resources | |
| return sorted( | |
| [f"{p.project_name}=={p.version}" for p in pkg_resources.working_set] | |
| ) | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # ========================================================= | |
| # DISK + MEMORY | |
| # ========================================================= | |
| def disk_info(): | |
| return { | |
| "disk_usage": run_cmd(["df", "-h"]), | |
| "memory": run_cmd(["free", "-h"]), | |
| } | |
| # ========================================================= | |
| # NETWORK INFO | |
| # ========================================================= | |
| def network_info(): | |
| return { | |
| "hostname": socket.gethostname(), | |
| "ip": socket.gethostbyname(socket.gethostname()), | |
| } | |
| # ========================================================= | |
| # HF CLI CHECKS | |
| # ========================================================= | |
| def hf_cli_info(): | |
| return { | |
| "whoami": run_cmd(["huggingface-cli", "whoami"]), | |
| "scan_cache": run_cmd(["huggingface-cli", "scan-cache"]), | |
| } | |
| # ========================================================= | |
| # PYTHON RUNTIME STATE | |
| # ========================================================= | |
| def runtime_info(): | |
| import threading | |
| return { | |
| "active_threads": [t.name for t in threading.enumerate()], | |
| "loaded_modules_count": len(sys.modules), | |
| } | |
| # ========================================================= | |
| # SPACE DETECTION (HF SPECIFIC) | |
| # ========================================================= | |
| def hf_space_info(): | |
| return { | |
| "is_space": "SPACE_ID" in os.environ, | |
| "space_id": os.getenv("SPACE_ID"), | |
| "hardware": os.getenv("SPACE_HARDWARE"), | |
| "sdk": os.getenv("SPACE_SDK"), | |
| } | |
| # ========================================================= | |
| # MAIN UNIVERSAL DEBUG ENDPOINT | |
| # ========================================================= | |
| def full_debug() -> Dict[str, Any]: | |
| report = { | |
| "system": safe_run("system", system_info), | |
| "environment": safe_run("env", env_info), | |
| "gpu": safe_run("gpu", gpu_info), | |
| "disk": safe_run("disk", disk_info), | |
| "network": safe_run("network", network_info), | |
| "hf_space": safe_run("hf_space", hf_space_info), | |
| "hf_cache": safe_run("hf_cache", hf_cache_info), | |
| "hf_cli": safe_run("hf_cli", hf_cli_info), | |
| "runtime": safe_run("runtime", runtime_info), | |
| "packages": safe_run("packages", packages_info), | |
| } | |
| return { | |
| "status": "ok", | |
| "generated_at": datetime.datetime.utcnow().isoformat(), | |
| "report": report, | |
| } |