Spaces:

build-small-hackathon
/

figment

Running on Zero

App Files Files Community

figment / scripts /evidence_gate_status.py

ThomsenDrake

docs: explicitly claim all merit badges

8046443 verified 19 days ago

Raw

History Blame Contribute Delete

13.4 kB

	#!/usr/bin/env python3
	"""Report Figment evidence gates without upgrading unsupported claims."""

	from __future__ import annotations

	import argparse
	import json
	from pathlib import Path
	import sys
	from typing import Any

	PROJECT_ROOT = Path(__file__).resolve().parents[1]
	if str(PROJECT_ROOT) not in sys.path:
	sys.path.insert(0, str(PROJECT_ROOT))

	from scripts import audit_submission_claims # noqa: E402


	REPO_ROOT = PROJECT_ROOT


	def build_report(repo_root: Path = REPO_ROOT) -> dict[str, Any]:
	repo_root = repo_root.resolve()
	claim_audit = audit_submission_claims.audit_claims(repo_root)
	gate_status = claim_audit["gate_status"]
	gates = {
	"public_space_no_secret": _public_space_gate(repo_root),
	"hosted_omni_eval": _hosted_eval_gate(repo_root),
	"local_4b_50_case_eval": _local_4b_eval_gate(repo_root),
	"no_cloud_route": _no_cloud_route_gate(repo_root),
	"llama_champion_route": _llama_champion_gate(repo_root),
	"local_asr_provider_proof": _local_asr_gate(repo_root),
	"trained_responder_user_test": _simple_gate(
	passed=gate_status.get("backyard_user_use", False),
	label="Trained-responder user test",
	required_evidence="Completed user-test notes from a real trained responder.",
	evidence_paths=_existing_paths(repo_root, [Path("docs/user_test_notes.md")]),
	next_action="Fill docs/user_test_notes.md from a real trained-responder session.",
	),
	"demo_video": _simple_gate(
	passed=gate_status.get("demo_video", False),
	label="Demo video",
	required_evidence="Final demo video link.",
	evidence_paths=_existing_paths(repo_root, [Path("docs/submission_checklist.md")]),
	next_action="Add the final demo video link after recording a route-supported demo.",
	),
	"social_post": _simple_gate(
	passed=gate_status.get("social_post", False),
	label="Social post",
	required_evidence="Final social post link with achieved-versus-targeted wording.",
	evidence_paths=_existing_paths(repo_root, [Path("docs/submission_checklist.md")]),
	next_action="Add the final social post link after proof-sensitive copy is ready.",
	),
	"well_tuned_adapter": _simple_gate(
	passed=gate_status.get("well_tuned", False),
	label="Well-Tuned adapter",
	required_evidence="Published tuned model or adapter used by the app and measured.",
	evidence_paths=_existing_paths(repo_root, [Path("docs/model_parameter_evidence_ledger.md")]),
	next_action="Leave Well-Tuned as stretch until a published measured adapter exists.",
	),
	"all_six_merit_badges_claimed": _merit_badges_gate(repo_root),
	"claim_audit": _simple_gate(
	passed=claim_audit["status"] == "passed",
	label="Submission claim audit",
	required_evidence="No premature achieved/proven/used/tested claims in submission-facing copy.",
	evidence_paths=_existing_paths(repo_root, audit_submission_claims.AUDITED_FILES),
	next_action="Run make audit-claims and fix any overclaiming lines.",
	extra={"violation_count": len(claim_audit["violations"])},
	),
	}
	missing_gate_keys = [key for key, gate in gates.items() if not gate["passed"]]
	badge_gate_keys = (
	"public_space_no_secret",
	"hosted_omni_eval",
	"demo_video",
	"well_tuned_adapter",
	"all_six_merit_badges_claimed",
	"claim_audit",
	)
	missing_badge_gate_keys = [key for key in badge_gate_keys if not gates[key]["passed"]]
	return {
	"status": "complete" if not missing_gate_keys else "incomplete",
	"ready_for_badge_claims": not missing_badge_gate_keys,
	"repo_root": str(repo_root),
	"gates": gates,
	"missing_gate_keys": missing_gate_keys,
	"missing_badge_gate_keys": missing_badge_gate_keys,
	}


	def _public_space_gate(repo_root: Path) -> dict[str, Any]:
	checklist = _read_text(repo_root / "docs/submission_checklist.md")
	passed = "Public Hugging Face Space \| Runnable" in checklist and "Space cold boot with app files present \| Verified" in checklist
	return _simple_gate(
	passed=passed,
	label="Public Hugging Face Space",
	required_evidence="Public Space URL plus cold-boot evidence with app files present.",
	evidence_paths=_existing_paths(repo_root, [Path("docs/submission_checklist.md")]),
	next_action="Re-verify public Space cold boot and record the current Space commit.",
	)


	def _hosted_eval_gate(repo_root: Path) -> dict[str, Any]:
	traces = sorted(repo_root.glob("traces/hosted_omni_eval*.jsonl"))
	scorecard_path = repo_root / "docs/hosted_omni_eval_results.md"
	scorecard = _read_text(scorecard_path)
	scorecard_has_current_metrics = all(
	marker in scorecard
	for marker in (
	"31/50",
	"8/50",
	"480/650",
	"170/650",
	"50/50",
	)
	)
	return _simple_gate(
	passed=bool(traces) or scorecard_has_current_metrics,
	label="Hosted Omni eval",
	required_evidence="Hosted Omni eval JSONL trace or committed scorecard.",
	evidence_paths=[str(path) for path in traces]
	+ _existing_paths(repo_root, [Path("docs/hosted_omni_eval_results.md")]),
	next_action="Run or refresh the hosted Omni eval and update docs/hosted_omni_eval_results.md.",
	)


	def _local_4b_eval_gate(repo_root: Path) -> dict[str, Any]:
	summaries = _local_4b_summaries(repo_root)
	passing = [
	(path, summary)
	for path, summary in summaries
	if summary.get("counts_as_50_case_local_llm_competence") is True
	and int(summary.get("total_cases") or 0) >= 50
	]
	evidence_paths = _local_4b_evidence_paths([path for path, _summary in passing] or [path for path, _summary in summaries])
	return _simple_gate(
	passed=bool(passing),
	label="Local 4B 50-case eval",
	required_evidence="50-case local OpenAI-compatible eval with configured-model competence.",
	evidence_paths=evidence_paths,
	next_action="Run scripts/run_local_4b_evidence.py against the local full-weight endpoint.",
	)


	def _no_cloud_route_gate(repo_root: Path) -> dict[str, Any]:
	summaries = _local_4b_summaries(repo_root)
	passing = [
	(path, summary)
	for path, summary in summaries
	if summary.get("counts_as_no_cloud_route_proof") is True
	]
	return _simple_gate(
	passed=bool(passing),
	label="No-cloud/off-grid route measurement",
	required_evidence="Recorded no-cloud route proof from a local or self-hosted endpoint.",
	evidence_paths=_local_4b_evidence_paths([path for path, _summary in passing] or [path for path, _summary in summaries]),
	next_action="Capture an additional no-cloud local route smoke or eval bundle.",
	)


	def _llama_champion_gate(repo_root: Path) -> dict[str, Any]:
	summaries = _local_4b_summaries(repo_root)
	passing = [
	(path, summary)
	for path, summary in summaries
	if summary.get("counts_as_50_case_local_llm_competence") is True
	and int(summary.get("total_cases") or 0) >= 50
	]
	return _simple_gate(
	passed=bool(passing),
	label="Llama Champion route measurement",
	required_evidence="Eligible local llama.cpp/OpenAI-compatible route with trace or eval evidence.",
	evidence_paths=_local_4b_evidence_paths([path for path, _summary in passing] or [path for path, _summary in summaries]),
	next_action="Record an additional qualifying local model route if a live endpoint evidence bundle is needed.",
	)


	def _local_asr_gate(repo_root: Path) -> dict[str, Any]:
	summaries = _local_asr_summaries(repo_root)
	passing = [
	(path, summary)
	for path, summary in summaries
	if summary.get("counts_as_local_asr_proof") is True
	]
	evidence_paths = _local_asr_evidence_paths([path for path, _summary in passing] or [path for path, _summary in summaries])
	return _simple_gate(
	passed=bool(passing),
	label="Local Parakeet ASR provider proof",
	required_evidence="Real local ASR provider payload with counts_as_local_asr_proof=true.",
	evidence_paths=evidence_paths,
	next_action="Run scripts/run_local_asr_evidence.py with a real local Parakeet provider payload.",
	)


	def _merit_badges_gate(repo_root: Path) -> dict[str, Any]:
	readme = _read_text(repo_root / "README.md")
	checklist = _read_text(repo_root / "docs/submission_checklist.md")
	combined = readme + "\n" + checklist
	required_markers = (
	"\| Off the Grid \| Claimed.",
	"\| Well-Tuned \| Claimed.",
	"\| Off-Brand \| Claimed.",
	"\| Llama Champion \| Claimed.",
	"\| Sharing is Caring \| Claimed.",
	"\| Field Notes \| Claimed.",
	"https://huggingface.co/blog/build-small-hackathon/figment-build-blog",
	"assets/figment-live-space-launch-final.mp4",
	)
	passed = all(marker in combined for marker in required_markers)
	return _simple_gate(
	passed=passed,
	label="All six merit badges claimed",
	required_evidence="README and submission checklist explicitly claim all six merit badges with video and field-notes links.",
	evidence_paths=_existing_paths(
	repo_root,
	[Path("README.md"), Path("docs/submission_checklist.md")],
	),
	next_action="Mark all six merit badges as claimed and include the launch video plus Field Notes blog links.",
	)


	def _simple_gate(
	*,
	passed: bool,
	label: str,
	required_evidence: str,
	evidence_paths: list[str],
	next_action: str,
	extra: dict[str, Any] \| None = None,
	) -> dict[str, Any]:
	gate = {
	"passed": bool(passed),
	"label": label,
	"required_evidence": required_evidence,
	"evidence_paths": evidence_paths,
	"next_action": "" if passed else next_action,
	}
	if extra:
	gate.update(extra)
	return gate


	def _local_4b_summaries(repo_root: Path) -> list[tuple[Path, dict[str, Any]]]:
	return [
	(path, _read_json(path))
	for path in sorted(repo_root.glob("traces/local_4b_evidence_*/summary.json"))
	]


	def _local_asr_summaries(repo_root: Path) -> list[tuple[Path, dict[str, Any]]]:
	return [
	(path, _read_json(path))
	for path in sorted(repo_root.glob("traces/local_asr_parakeet_evidence_*/summary.json"))
	]


	def _local_4b_evidence_paths(summary_paths: list[Path]) -> list[str]:
	paths: list[str] = []
	for summary_path in summary_paths:
	paths.append(str(summary_path))
	manifest_path = summary_path.parent / "eval_evidence_manifest.json"
	if manifest_path.exists():
	paths.append(str(manifest_path))
	return paths


	def _local_asr_evidence_paths(summary_paths: list[Path]) -> list[str]:
	paths: list[str] = []
	for summary_path in summary_paths:
	paths.append(str(summary_path))
	manifest_path = summary_path.parent / "asr_evidence_manifest.json"
	if manifest_path.exists():
	paths.append(str(manifest_path))
	return paths


	def _existing_paths(repo_root: Path, relative_paths: tuple[Path, ...] \| list[Path]) -> list[str]:
	return [str(repo_root / path) for path in relative_paths if (repo_root / path).exists()]


	def _read_json(path: Path) -> dict[str, Any]:
	try:
	payload = json.loads(path.read_text(encoding="utf-8"))
	except (OSError, json.JSONDecodeError):
	return {}
	return payload if isinstance(payload, dict) else {}


	def _read_text(path: Path) -> str:
	try:
	return path.read_text(encoding="utf-8")
	except OSError:
	return ""


	def _markdown_report(report: dict[str, Any]) -> str:
	lines = [
	"# Figment Evidence Gate Status",
	"",
	f"- Status: `{report['status']}`",
	f"- Ready for badge claims: `{str(report['ready_for_badge_claims']).lower()}`",
	"",
	"\| Gate \| Passed \| Next action \|",
	"\| ---- \| ------ \| ----------- \|",
	]
	for key, gate in report["gates"].items():
	next_action = gate["next_action"] or "Evidence recorded."
	lines.append(f"\| `{key}` \| `{str(gate['passed']).lower()}` \| {next_action} \|")
	lines.append("")
	return "\n".join(lines)


	def main(argv: list[str] \| None = None) -> int:
	parser = argparse.ArgumentParser(description=__doc__)
	parser.add_argument("--repo-root", type=Path, default=REPO_ROOT)
	parser.add_argument("--json", action="store_true")
	parser.add_argument("--markdown", action="store_true")
	args = parser.parse_args(argv)

	report = build_report(args.repo_root)
	if args.json:
	print(json.dumps(report, indent=2, sort_keys=True))
	elif args.markdown:
	print(_markdown_report(report))
	else:
	print(f"evidence gate status: {report['status']}")
	for key in report["missing_gate_keys"]:
	gate = report["gates"][key]
	print(f"- {key}: {gate['next_action']}")
	return 0 if report["status"] == "complete" else 2


	if __name__ == "__main__":
	raise SystemExit(main())