tinymind-native-8b-remote-handoff/bundle/data/alignment_tool_sft_forge.py

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-8b-remote-handoff /bundle /data /alignment_tool_sft_forge.py

5.9 kB

	from __future__ import annotations

	from collections import Counter
	from datetime import datetime, timezone
	import hashlib
	import json
	from pathlib import Path
	from typing import Any


	SCHEMA_VERSION = "tinymind-alignment-tool-sft-v1"
	SYSTEM = "You are TinyMind Alignment Tutor. Obey exact user constraints and emit valid tool schemas when requested."

	DOMAINS = ("alignment_constraint_following", "alignment_tool_calling")
	CONSTRAINTS = (
	"answer exactly three bullets",
	"return only valid JSON with keys action,args,verification",
	"summarize in one sentence then provide a checklist",
	"refuse unsafe scope and provide a safe alternative",
	"extract parameters and ask one clarifying question only if required",
	"follow the latest user instruction unless it conflicts with system policy",
	)
	TOOLS = (
	("sandbox.run_code", {"language": "lua", "code": "return 2 + 2"}),
	("sandbox.proxy.http_get", {"url": "http://127.0.0.1:8000/health"}),
	("sandbox.env.run", {"name": "build-a", "argv": ["python", "-m", "pytest", "-q"]}),
	("fs.read", {"path": "reports/current_model_results/current_model_results.json"}),
	("fs.write", {"path": "reports/alignment/manifest.json", "content": "{\"ok\": true}"}),
	("cmd.run", {"argv": ["python", "-m", "train.cli", "current-model-results"]}),
	)


	def _sha(text: str) -> str:
	return hashlib.sha256(text.encode("utf-8")).hexdigest()


	def _constraint_row(idx: int) -> dict[str, Any]:
	constraint = CONSTRAINTS[idx % len(CONSTRAINTS)]
	payload = {
	"action": "respond",
	"args": {"format": constraint, "safety": "policy_checked"},
	"verification": ["constraint_satisfied", "no_extra_keys", "no_unverified_claim"],
	}
	return _row(
	domain="alignment_constraint_following",
	user=f"ทำตาม constraint นี้ให้เป๊ะ: {constraint}. ห้ามเติมคำเกริ่นที่ไม่จำเป็น.",
	assistant=json.dumps(payload, ensure_ascii=False, indent=2),
	idx=idx,
	)


	def _tool_row(idx: int) -> dict[str, Any]:
	tool, args = TOOLS[idx % len(TOOLS)]
	payload = {
	"tool": tool,
	"arguments": args,
	"audit": {"requires_sandbox": True, "network_scope": "local_only", "verify_after": True},
	}
	return _row(
	domain="alignment_tool_calling",
	user=f"เลือก tool ที่ถูกต้องสำหรับงานนี้และตอบเป็น JSON เท่านั้น: {tool}",
	assistant=json.dumps(payload, ensure_ascii=False, indent=2),
	idx=idx,
	)


	def _row(domain: str, user: str, assistant: str, idx: int) -> dict[str, Any]:
	fingerprint = _sha(f"{domain}\|{idx}\|{user}\|{assistant}")
	return {
	"messages": [
	{"role": "system", "content": SYSTEM},
	{"role": "user", "content": user},
	{"role": "assistant", "content": assistant},
	],
	"source": "alignment_tool_sft_surgery",
	"metadata": {
	"schema_version": SCHEMA_VERSION,
	"domain": domain,
	"variant": idx,
	"fingerprint_sha256": fingerprint,
	"loss_weight": 1.45,
	"quality_tags": [
	"alignment_tool_sft",
	"constraint_following",
	"tool_calling",
	"json_schema",
	"response_only_loss_target",
	],
	},
	}


	def _rows(target_records: int):
	for idx in range(target_records):
	yield _constraint_row(idx) if idx % 2 == 0 else _tool_row(idx)


	def _write_jsonl(path: Path, rows) -> int:
	path.parent.mkdir(parents=True, exist_ok=True)
	count = 0
	with path.open("w", encoding="utf-8", newline="\n") as f:
	for row in rows:
	f.write(json.dumps(row, ensure_ascii=False) + "\n")
	count += 1
	return count


	def _file_sha(path: Path) -> str:
	h = hashlib.sha256()
	with path.open("rb") as f:
	for chunk in iter(lambda: f.read(1024 * 1024), b""):
	h.update(chunk)
	return h.hexdigest()


	def build_alignment_tool_sft_dataset(out_dir: str \| Path, *, target_records: int = 30_000, eval_fraction: float = 0.02) -> dict[str, Any]:
	out = Path(out_dir)
	train_path = out / "alignment_tool_sft_train.jsonl"
	eval_path = out / "alignment_tool_sft_eval.jsonl"
	manifest_path = out / "alignment_tool_sft_manifest.json"
	eval_mod = max(1, round(1 / max(0.001, min(eval_fraction, 0.5))))
	train_rows = []
	eval_rows = []
	domain_counts: Counter[str] = Counter()
	for idx, row in enumerate(_rows(target_records)):
	domain_counts[row["metadata"]["domain"]] += 1
	if idx % eval_mod == 0:
	eval_rows.append(row)
	else:
	train_rows.append(row)
	train_count = _write_jsonl(train_path, train_rows)
	eval_count = _write_jsonl(eval_path, eval_rows)
	report: dict[str, Any] = {
	"schema_version": SCHEMA_VERSION,
	"created_at": datetime.now(timezone.utc).isoformat(),
	"summary": {
	"records_written": train_count + eval_count,
	"train_records": train_count,
	"eval_records": eval_count,
	"domain_counts": dict(sorted(domain_counts.items())),
	"loss_weight": 1.45,
	},
	"outputs": {
	"train_jsonl": str(train_path),
	"eval_jsonl": str(eval_path),
	"train_sha256": _file_sha(train_path),
	"eval_sha256": _file_sha(eval_path),
	},
	"claim_gate": {
	"alignment_tool_sft_ready": target_records >= 100,
	"world_best_claim_allowed": False,
	},
	}
	report["manifest_path"] = str(manifest_path)
	manifest_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
	return report

Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Xet Storage Details