bbkdevops's picture
download
raw
6.64 kB
"""TinyMind AI DevTools Suite.
Project-native developer tools for evidence-driven model development. The
suite scans source assets, hashes critical files, runs allowlisted checks, and
builds a training/readiness report for TinyMind without making unsupported
world-best claims.
"""
from __future__ import annotations
from datetime import datetime, timezone
import hashlib
import json
from pathlib import Path
import subprocess
from typing import Iterable
SOURCE_EXTENSIONS = {".py", ".rs", ".toml", ".md", ".json", ".jsonl", ".ps1", ".cu", ".h", ".cpp"}
EXCLUDED_DIRS = {".git", "__pycache__", ".pytest_cache", "target", "reports", "checkpoints", ".venv", "venv"}
DEFAULT_SMOKE_COMMANDS = [
["python", "-m", "pytest", "tests\\test_grounded_answer.py", "tests\\test_universal_context.py", "-q"],
]
def _sha256(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(1024 * 1024), b""):
h.update(chunk)
return h.hexdigest()
def _is_excluded(path: Path) -> bool:
return bool(set(path.parts) & EXCLUDED_DIRS)
def scan_project(root: str | Path) -> dict:
root_path = Path(root).resolve()
files = []
counts: dict[str, int] = {}
for path in sorted(root_path.rglob("*")):
if not path.is_file() or _is_excluded(path):
continue
if path.suffix.lower() not in SOURCE_EXTENSIONS:
continue
rel = str(path.relative_to(root_path))
ext = path.suffix.lower() or "<none>"
counts[ext] = counts.get(ext, 0) + 1
files.append(
{
"path": rel,
"extension": ext,
"bytes": path.stat().st_size,
"sha256": _sha256(path),
}
)
return {
"root": str(root_path),
"file_count": len(files),
"extension_counts": counts,
"files": files,
"project_sha256": hashlib.sha256(
"\n".join(f"{row['path']}:{row['sha256']}" for row in files).encode("utf-8")
).hexdigest(),
}
def _allowed_command(argv: list[str]) -> bool:
if len(argv) >= 3 and argv[0].lower() == "python" and argv[1:3] == ["-m", "pytest"]:
return True
if len(argv) >= 2 and argv[0].lower() == "cargo" and argv[1] in {"test", "build"}:
return True
return False
def run_allowlisted_command(argv: list[str], cwd: str | Path, timeout_s: int = 180) -> dict:
if not _allowed_command(argv):
return {"argv": argv, "ok": False, "skipped": True, "reason": "command_not_allowlisted"}
try:
proc = subprocess.run(argv, cwd=cwd, capture_output=True, text=True, shell=False, timeout=timeout_s)
return {
"argv": argv,
"ok": proc.returncode == 0,
"returncode": proc.returncode,
"stdout_tail": proc.stdout[-4000:],
"stderr_tail": proc.stderr[-4000:],
}
except subprocess.TimeoutExpired as exc:
return {"argv": argv, "ok": False, "returncode": None, "stdout_tail": exc.stdout or "", "stderr_tail": "timeout"}
def training_readiness(root: str | Path) -> dict:
root_path = Path(root)
required = {
"knowledge_full_cycle": root_path / "reports" / "knowledge_full_cycle" / "knowledge_full_cycle_report.json",
"context_ledger": root_path / "reports" / "context_ledger_tinymind_clean" / "universal_context_manifest.json",
"ai_tools_dll": root_path / "runtime" / "ai_tools_dll" / "target" / "release" / "tinymind_ai_tools.dll",
"grounded_answer_guard": root_path / "evaluation" / "grounded_answer.py",
"universal_context": root_path / "data" / "universal_context.py",
}
rows = []
for name, path in required.items():
rows.append({"name": name, "path": str(path), "exists": path.exists(), "bytes": path.stat().st_size if path.exists() else 0})
passed = sum(1 for row in rows if row["exists"])
return {
"required_count": len(rows),
"passed_count": passed,
"coverage_percent": 100.0 * passed / max(len(rows), 1),
"passed": passed == len(rows),
"checks": rows,
"definition": "100% means all local DevTools/training support artifacts required by this suite exist and are hashable.",
}
def run_ai_devtools(
root: str | Path,
out_dir: str | Path,
run_smoke: bool = False,
extra_commands: Iterable[list[str]] = (),
) -> dict:
root_path = Path(root).resolve()
out = Path(out_dir)
out.mkdir(parents=True, exist_ok=True)
scan = scan_project(root_path)
readiness = training_readiness(root_path)
commands = []
if run_smoke:
for argv in list(DEFAULT_SMOKE_COMMANDS) + list(extra_commands):
commands.append(run_allowlisted_command(argv, root_path))
command_gate = {"passed": all(row.get("ok", False) for row in commands) if commands else True, "commands": commands}
report = {
"schema_version": "tinymind-ai-devtools-suite-v1",
"created_at": datetime.now(timezone.utc).isoformat(),
"claim": "AI-native DevTools for TinyMind evidence, training readiness, and source-grounded tool use.",
"world_best_claim_allowed": False,
"scan": scan,
"training_readiness": readiness,
"command_gate": command_gate,
"devtools_gate": {
"passed": readiness["passed"] and command_gate["passed"] and scan["file_count"] > 0,
"notes": "This is a local completeness gate, not an official global ranking.",
},
}
json_path = out / "ai_devtools_report.json"
md_path = out / "ai_devtools_report.md"
report["json_path"] = str(json_path)
report["markdown_path"] = str(md_path)
json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
md_path.write_text(_markdown(report), encoding="utf-8")
return report
def _markdown(report: dict) -> str:
lines = [
"# TinyMind AI DevTools Suite",
"",
f"- Files scanned: {report['scan']['file_count']}",
f"- Project hash: `{report['scan']['project_sha256']}`",
f"- Training readiness: {report['training_readiness']['coverage_percent']:.1f}%",
f"- Command gate: {report['command_gate']['passed']}",
f"- DevTools gate: {report['devtools_gate']['passed']}",
"- World-best claim allowed: false",
"",
"## Readiness Checks",
"",
]
for row in report["training_readiness"]["checks"]:
lines.append(f"- {row['name']}: {row['exists']} ({row['path']})")
return "\n".join(lines) + "\n"

Xet Storage Details

Size:
6.64 kB
·
Xet hash:
4b1aed4902191aa4a1954d1bc4701b96ccfbbc7ef2bff5adec8f450871b083c5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.