Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /ai_devtools.py
| """TinyMind AI DevTools Suite. | |
| Project-native developer tools for evidence-driven model development. The | |
| suite scans source assets, hashes critical files, runs allowlisted checks, and | |
| builds a training/readiness report for TinyMind without making unsupported | |
| world-best claims. | |
| """ | |
| from __future__ import annotations | |
| from datetime import datetime, timezone | |
| import hashlib | |
| import json | |
| from pathlib import Path | |
| import subprocess | |
| from typing import Iterable | |
| SOURCE_EXTENSIONS = {".py", ".rs", ".toml", ".md", ".json", ".jsonl", ".ps1", ".cu", ".h", ".cpp"} | |
| EXCLUDED_DIRS = {".git", "__pycache__", ".pytest_cache", "target", "reports", "checkpoints", ".venv", "venv"} | |
| DEFAULT_SMOKE_COMMANDS = [ | |
| ["python", "-m", "pytest", "tests\\test_grounded_answer.py", "tests\\test_universal_context.py", "-q"], | |
| ] | |
| def _sha256(path: Path) -> str: | |
| h = hashlib.sha256() | |
| with path.open("rb") as f: | |
| for chunk in iter(lambda: f.read(1024 * 1024), b""): | |
| h.update(chunk) | |
| return h.hexdigest() | |
| def _is_excluded(path: Path) -> bool: | |
| return bool(set(path.parts) & EXCLUDED_DIRS) | |
| def scan_project(root: str | Path) -> dict: | |
| root_path = Path(root).resolve() | |
| files = [] | |
| counts: dict[str, int] = {} | |
| for path in sorted(root_path.rglob("*")): | |
| if not path.is_file() or _is_excluded(path): | |
| continue | |
| if path.suffix.lower() not in SOURCE_EXTENSIONS: | |
| continue | |
| rel = str(path.relative_to(root_path)) | |
| ext = path.suffix.lower() or "<none>" | |
| counts[ext] = counts.get(ext, 0) + 1 | |
| files.append( | |
| { | |
| "path": rel, | |
| "extension": ext, | |
| "bytes": path.stat().st_size, | |
| "sha256": _sha256(path), | |
| } | |
| ) | |
| return { | |
| "root": str(root_path), | |
| "file_count": len(files), | |
| "extension_counts": counts, | |
| "files": files, | |
| "project_sha256": hashlib.sha256( | |
| "\n".join(f"{row['path']}:{row['sha256']}" for row in files).encode("utf-8") | |
| ).hexdigest(), | |
| } | |
| def _allowed_command(argv: list[str]) -> bool: | |
| if len(argv) >= 3 and argv[0].lower() == "python" and argv[1:3] == ["-m", "pytest"]: | |
| return True | |
| if len(argv) >= 2 and argv[0].lower() == "cargo" and argv[1] in {"test", "build"}: | |
| return True | |
| return False | |
| def run_allowlisted_command(argv: list[str], cwd: str | Path, timeout_s: int = 180) -> dict: | |
| if not _allowed_command(argv): | |
| return {"argv": argv, "ok": False, "skipped": True, "reason": "command_not_allowlisted"} | |
| try: | |
| proc = subprocess.run(argv, cwd=cwd, capture_output=True, text=True, shell=False, timeout=timeout_s) | |
| return { | |
| "argv": argv, | |
| "ok": proc.returncode == 0, | |
| "returncode": proc.returncode, | |
| "stdout_tail": proc.stdout[-4000:], | |
| "stderr_tail": proc.stderr[-4000:], | |
| } | |
| except subprocess.TimeoutExpired as exc: | |
| return {"argv": argv, "ok": False, "returncode": None, "stdout_tail": exc.stdout or "", "stderr_tail": "timeout"} | |
| def training_readiness(root: str | Path) -> dict: | |
| root_path = Path(root) | |
| required = { | |
| "knowledge_full_cycle": root_path / "reports" / "knowledge_full_cycle" / "knowledge_full_cycle_report.json", | |
| "context_ledger": root_path / "reports" / "context_ledger_tinymind_clean" / "universal_context_manifest.json", | |
| "ai_tools_dll": root_path / "runtime" / "ai_tools_dll" / "target" / "release" / "tinymind_ai_tools.dll", | |
| "grounded_answer_guard": root_path / "evaluation" / "grounded_answer.py", | |
| "universal_context": root_path / "data" / "universal_context.py", | |
| } | |
| rows = [] | |
| for name, path in required.items(): | |
| rows.append({"name": name, "path": str(path), "exists": path.exists(), "bytes": path.stat().st_size if path.exists() else 0}) | |
| passed = sum(1 for row in rows if row["exists"]) | |
| return { | |
| "required_count": len(rows), | |
| "passed_count": passed, | |
| "coverage_percent": 100.0 * passed / max(len(rows), 1), | |
| "passed": passed == len(rows), | |
| "checks": rows, | |
| "definition": "100% means all local DevTools/training support artifacts required by this suite exist and are hashable.", | |
| } | |
| def run_ai_devtools( | |
| root: str | Path, | |
| out_dir: str | Path, | |
| run_smoke: bool = False, | |
| extra_commands: Iterable[list[str]] = (), | |
| ) -> dict: | |
| root_path = Path(root).resolve() | |
| out = Path(out_dir) | |
| out.mkdir(parents=True, exist_ok=True) | |
| scan = scan_project(root_path) | |
| readiness = training_readiness(root_path) | |
| commands = [] | |
| if run_smoke: | |
| for argv in list(DEFAULT_SMOKE_COMMANDS) + list(extra_commands): | |
| commands.append(run_allowlisted_command(argv, root_path)) | |
| command_gate = {"passed": all(row.get("ok", False) for row in commands) if commands else True, "commands": commands} | |
| report = { | |
| "schema_version": "tinymind-ai-devtools-suite-v1", | |
| "created_at": datetime.now(timezone.utc).isoformat(), | |
| "claim": "AI-native DevTools for TinyMind evidence, training readiness, and source-grounded tool use.", | |
| "world_best_claim_allowed": False, | |
| "scan": scan, | |
| "training_readiness": readiness, | |
| "command_gate": command_gate, | |
| "devtools_gate": { | |
| "passed": readiness["passed"] and command_gate["passed"] and scan["file_count"] > 0, | |
| "notes": "This is a local completeness gate, not an official global ranking.", | |
| }, | |
| } | |
| json_path = out / "ai_devtools_report.json" | |
| md_path = out / "ai_devtools_report.md" | |
| report["json_path"] = str(json_path) | |
| report["markdown_path"] = str(md_path) | |
| json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| md_path.write_text(_markdown(report), encoding="utf-8") | |
| return report | |
| def _markdown(report: dict) -> str: | |
| lines = [ | |
| "# TinyMind AI DevTools Suite", | |
| "", | |
| f"- Files scanned: {report['scan']['file_count']}", | |
| f"- Project hash: `{report['scan']['project_sha256']}`", | |
| f"- Training readiness: {report['training_readiness']['coverage_percent']:.1f}%", | |
| f"- Command gate: {report['command_gate']['passed']}", | |
| f"- DevTools gate: {report['devtools_gate']['passed']}", | |
| "- World-best claim allowed: false", | |
| "", | |
| "## Readiness Checks", | |
| "", | |
| ] | |
| for row in report["training_readiness"]["checks"]: | |
| lines.append(f"- {row['name']}: {row['exists']} ({row['path']})") | |
| return "\n".join(lines) + "\n" | |
Xet Storage Details
- Size:
- 6.64 kB
- Xet hash:
- 4b1aed4902191aa4a1954d1bc4701b96ccfbbc7ef2bff5adec8f450871b083c5
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.