Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /ai_devtools.py

bbkdevops

about 1 month ago

download

raw

6.64 kB

	"""TinyMind AI DevTools Suite.

	Project-native developer tools for evidence-driven model development. The
	suite scans source assets, hashes critical files, runs allowlisted checks, and
	builds a training/readiness report for TinyMind without making unsupported
	world-best claims.
	"""

	from __future__ import annotations

	from datetime import datetime, timezone
	import hashlib
	import json
	from pathlib import Path
	import subprocess
	from typing import Iterable


	SOURCE_EXTENSIONS = {".py", ".rs", ".toml", ".md", ".json", ".jsonl", ".ps1", ".cu", ".h", ".cpp"}
	EXCLUDED_DIRS = {".git", "__pycache__", ".pytest_cache", "target", "reports", "checkpoints", ".venv", "venv"}
	DEFAULT_SMOKE_COMMANDS = [
	["python", "-m", "pytest", "tests\\test_grounded_answer.py", "tests\\test_universal_context.py", "-q"],
	]


	def _sha256(path: Path) -> str:
	h = hashlib.sha256()
	with path.open("rb") as f:
	for chunk in iter(lambda: f.read(1024 * 1024), b""):
	h.update(chunk)
	return h.hexdigest()


	def _is_excluded(path: Path) -> bool:
	return bool(set(path.parts) & EXCLUDED_DIRS)


	def scan_project(root: str \| Path) -> dict:
	root_path = Path(root).resolve()
	files = []
	counts: dict[str, int] = {}
	for path in sorted(root_path.rglob("*")):
	if not path.is_file() or _is_excluded(path):
	continue
	if path.suffix.lower() not in SOURCE_EXTENSIONS:
	continue
	rel = str(path.relative_to(root_path))
	ext = path.suffix.lower() or "<none>"
	counts[ext] = counts.get(ext, 0) + 1
	files.append(
	{
	"path": rel,
	"extension": ext,
	"bytes": path.stat().st_size,
	"sha256": _sha256(path),
	}
	)
	return {
	"root": str(root_path),
	"file_count": len(files),
	"extension_counts": counts,
	"files": files,
	"project_sha256": hashlib.sha256(
	"\n".join(f"{row['path']}:{row['sha256']}" for row in files).encode("utf-8")
	).hexdigest(),
	}


	def _allowed_command(argv: list[str]) -> bool:
	if len(argv) >= 3 and argv[0].lower() == "python" and argv[1:3] == ["-m", "pytest"]:
	return True
	if len(argv) >= 2 and argv[0].lower() == "cargo" and argv[1] in {"test", "build"}:
	return True
	return False


	def run_allowlisted_command(argv: list[str], cwd: str \| Path, timeout_s: int = 180) -> dict:
	if not _allowed_command(argv):
	return {"argv": argv, "ok": False, "skipped": True, "reason": "command_not_allowlisted"}
	try:
	proc = subprocess.run(argv, cwd=cwd, capture_output=True, text=True, shell=False, timeout=timeout_s)
	return {
	"argv": argv,
	"ok": proc.returncode == 0,
	"returncode": proc.returncode,
	"stdout_tail": proc.stdout[-4000:],
	"stderr_tail": proc.stderr[-4000:],
	}
	except subprocess.TimeoutExpired as exc:
	return {"argv": argv, "ok": False, "returncode": None, "stdout_tail": exc.stdout or "", "stderr_tail": "timeout"}


	def training_readiness(root: str \| Path) -> dict:
	root_path = Path(root)
	required = {
	"knowledge_full_cycle": root_path / "reports" / "knowledge_full_cycle" / "knowledge_full_cycle_report.json",
	"context_ledger": root_path / "reports" / "context_ledger_tinymind_clean" / "universal_context_manifest.json",
	"ai_tools_dll": root_path / "runtime" / "ai_tools_dll" / "target" / "release" / "tinymind_ai_tools.dll",
	"grounded_answer_guard": root_path / "evaluation" / "grounded_answer.py",
	"universal_context": root_path / "data" / "universal_context.py",
	}
	rows = []
	for name, path in required.items():
	rows.append({"name": name, "path": str(path), "exists": path.exists(), "bytes": path.stat().st_size if path.exists() else 0})
	passed = sum(1 for row in rows if row["exists"])
	return {
	"required_count": len(rows),
	"passed_count": passed,
	"coverage_percent": 100.0 * passed / max(len(rows), 1),
	"passed": passed == len(rows),
	"checks": rows,
	"definition": "100% means all local DevTools/training support artifacts required by this suite exist and are hashable.",
	}


	def run_ai_devtools(
	root: str \| Path,
	out_dir: str \| Path,
	run_smoke: bool = False,
	extra_commands: Iterable[list[str]] = (),
	) -> dict:
	root_path = Path(root).resolve()
	out = Path(out_dir)
	out.mkdir(parents=True, exist_ok=True)
	scan = scan_project(root_path)
	readiness = training_readiness(root_path)
	commands = []
	if run_smoke:
	for argv in list(DEFAULT_SMOKE_COMMANDS) + list(extra_commands):
	commands.append(run_allowlisted_command(argv, root_path))
	command_gate = {"passed": all(row.get("ok", False) for row in commands) if commands else True, "commands": commands}
	report = {
	"schema_version": "tinymind-ai-devtools-suite-v1",
	"created_at": datetime.now(timezone.utc).isoformat(),
	"claim": "AI-native DevTools for TinyMind evidence, training readiness, and source-grounded tool use.",
	"world_best_claim_allowed": False,
	"scan": scan,
	"training_readiness": readiness,
	"command_gate": command_gate,
	"devtools_gate": {
	"passed": readiness["passed"] and command_gate["passed"] and scan["file_count"] > 0,
	"notes": "This is a local completeness gate, not an official global ranking.",
	},
	}
	json_path = out / "ai_devtools_report.json"
	md_path = out / "ai_devtools_report.md"
	report["json_path"] = str(json_path)
	report["markdown_path"] = str(md_path)
	json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
	md_path.write_text(_markdown(report), encoding="utf-8")
	return report


	def _markdown(report: dict) -> str:
	lines = [
	"# TinyMind AI DevTools Suite",
	"",
	f"- Files scanned: {report['scan']['file_count']}",
	f"- Project hash: `{report['scan']['project_sha256']}`",
	f"- Training readiness: {report['training_readiness']['coverage_percent']:.1f}%",
	f"- Command gate: {report['command_gate']['passed']}",
	f"- DevTools gate: {report['devtools_gate']['passed']}",
	"- World-best claim allowed: false",
	"",
	"## Readiness Checks",
	"",
	]
	for row in report["training_readiness"]["checks"]:
	lines.append(f"- {row['name']}: {row['exists']} ({row['path']})")
	return "\n".join(lines) + "\n"

Xet Storage Details

Size:: 6.64 kB
Xet hash:: 4b1aed4902191aa4a1954d1bc4701b96ccfbbc7ef2bff5adec8f450871b083c5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.