Spaces:
Sleeping
Sleeping
| """Tier 5 β deploy safety. READ-ONLY vs production (HTTP GET + git ls-remote | |
| + local git only). NEVER pushes or mutates anything.""" | |
| from __future__ import annotations | |
| import fnmatch, json, urllib.request | |
| from audit.core import register, Result, Status, REPO, git, sh | |
| SPACE_API = "https://huggingface.co/api/spaces/rohitsar567/InsuranceBot" | |
| LIVE = "https://rohitsar567-insurancebot.hf.space" | |
| def t5_1() -> Result: | |
| ga = (REPO / ".gitattributes").read_text(encoding="utf-8", errors="replace") | |
| globs = [l.split()[0] for l in ga.splitlines() | |
| if "filter=lfs" in l and l.strip() and not l.startswith("#")] | |
| lfs = set(sh(["git", "lfs", "ls-files", "-n"]).stdout.split()) | |
| bad = [p for p in git("ls-files").splitlines() | |
| if any(fnmatch.fnmatch(p, g) for g in globs) and p not in lfs] | |
| return (Result("T5.1", Status.FAIL, f"non-LFS but LFS-globbed (HF would reject): {bad[:6]}", | |
| "git rm --cached + re-add so they store as LFS pointers") | |
| if bad else Result("T5.1", Status.PASS, "all LFS-pattern files are pointers")) | |
| def t5_2() -> Result: | |
| df = (REPO / "Dockerfile").read_text(encoding="utf-8", errors="replace") | |
| missing = [] | |
| for ln in df.splitlines(): | |
| s = ln.strip() | |
| if s.startswith("COPY ") and not s.startswith("COPY --from"): | |
| parts = s.split() | |
| if len(parts) >= 3: | |
| src = parts[1] | |
| if src not in (".",) and "*" not in src and not (REPO / src).exists(): | |
| missing.append(src) | |
| collide = [p for p in git("ls-files").splitlines() | |
| if p in ("rag/corpus", "rag/extracted", "rag/vectors")] | |
| if missing or collide: | |
| return Result("T5.2", Status.FAIL, | |
| f"COPY src missing: {missing}; dataset-hydration collide: {collide}", | |
| "fix the COPY source path / untrack rag/corpus|extracted|vectors") | |
| return Result("T5.2", Status.PASS, "COPY paths exist; no hydration collision") | |
| def t5_3() -> Result: | |
| head = git("rev-parse", "HEAD")[:12] | |
| o = sh(["git", "ls-remote", "origin", "-h", "refs/heads/main"]).stdout.split() | |
| origin_sha = o[0][:12] if o else "" | |
| try: | |
| rt = json.load(urllib.request.urlopen(SPACE_API, timeout=20)).get("runtime", {}) | |
| except Exception as e: | |
| return Result("T5.3", Status.SKIP, f"no network/HF API: {e}", "run with network to verify deploy") | |
| stage = rt.get("stage") | |
| live_sha = (rt.get("sha") or "")[:12] | |
| if stage != "RUNNING": | |
| return Result("T5.3", Status.WARN, f"Space stage={stage} sha={live_sha}", | |
| "wait for build / inspect HF build log") | |
| if origin_sha and live_sha and live_sha != origin_sha: | |
| return Result("T5.3", Status.WARN, | |
| f"live sha {live_sha} != origin/main {origin_sha} (Space lagging build / LFS silent-fail?)", | |
| "verify the Space actually rebuilt; never trust 'RUNNING' alone") | |
| if origin_sha and origin_sha != head: | |
| return Result("T5.3", Status.WARN, | |
| f"local HEAD {head} not pushed (origin/main={origin_sha} is live)", | |
| "expected if deploy is deliberately deferred; push when ready") | |
| try: | |
| h = urllib.request.urlopen(f"{LIVE}/api/health", timeout=20).read(200).decode() | |
| logo = urllib.request.urlopen(f"{LIVE}/insurer-logos/oriental-insurance.png", timeout=20) | |
| ok = '"status":"ok"' in h and logo.headers.get_content_type() == "image/png" | |
| return (Result("T5.3", Status.PASS, f"sha {live_sha} live; health ok; LFS logo image/png") | |
| if ok else Result("T5.3", Status.FAIL, f"live unhealthy: {h[:80]}", "investigate live deploy")) | |
| except Exception as e: | |
| return Result("T5.3", Status.WARN, f"live smoke unreachable: {e}", "retry with network") | |
| def t5_4() -> Result: | |
| warns = [] | |
| bloat = sh(["bash", "-lc", | |
| "find rag -name link_lists.bin -size +200M 2>/dev/null | head -3; " | |
| "du -sm rag/_hf_dataset_backup 2>/dev/null | awk '$1>20000{print $0\" MB\"}'"]).stdout.strip() | |
| if bloat: | |
| warns.append(f"chroma/backup bloat: {bloat[:160]}") | |
| free = sh(["bash", "-lc", "df -m . | tail -1 | awk '{print $4}'"]).stdout.strip() | |
| if free.isdigit() and int(free) < 2000: | |
| warns.append(f"low disk: {free} MB free") | |
| stale = sh(["bash", "-lc", | |
| "grep -rl 'Status | Live' 70-docs 2>/dev/null | xargs -r grep -l 'orchestrator.py' 2>/dev/null | head -3"]).stdout.strip() | |
| if stale: | |
| warns.append(f"stale present-state docs: {stale.splitlines()[:3]}") | |
| return (Result("T5.4", Status.WARN, " | ".join(warns), "address the flagged tripwire") | |
| if warns else Result("T5.4", Status.PASS, "no bloat/disk/stale-doc tripwire")) | |