bbkdevops's picture
download
raw
6.63 kB
"""Command intensity governor and SFT forge."""
from __future__ import annotations
from datetime import datetime, timezone
import json
from pathlib import Path
from typing import Any
from model.command_intensity_core import CommandIntensityCore
SEED_COMMANDS = [
"พัฒนาโมเดลให้ตอบภาษาไทยเป็นธรรมชาติ ห้ามตอบฟิก ต้องวัดผลจริงและแสดง log หลักฐาน",
"เทรนให้เข้าใจโค้ดและอธิบายบักพร้อม test โดยไม่เดา",
"ค้นหาข้อมูลปัจจุบันก่อนตอบ และแนบแหล่งที่มาเมื่อเป็นข้อมูลภายนอก",
"ใช้เครื่องมือ sandbox แล้วรอ observation ก่อนสรุปว่าทำสำเร็จ",
"ลดการใช้ทรัพยากรแต่คงคุณภาพด้วย runtime metric จริง",
"ขัดกรองข้อมูลเพียวบริสุทธิ์และกันข้อมูลซ้ำหรือข้อมูลขยะก่อนเข้าเทรน",
"ตอบตามคำสั่งยาวซับซ้อนโดยแยกเจตนา ข้อห้าม หลักฐาน และผลลัพธ์",
]
def _sft_row(command: str, analysis: dict[str, Any], idx: int, split: str) -> dict[str, Any]:
return {
"id": f"command-intensity-{split}-{idx:04d}",
"domain": "command_intensity",
"messages": [
{
"role": "system",
"content": "You are TinyMind Command Intensity Core. Follow the user intent precisely, avoid canned answers, and report only evidence-backed results.",
},
{"role": "user", "content": command},
{
"role": "assistant",
"content": (
f"เจตนา: {analysis['intent']}\n"
f"ข้อกำกับ: {', '.join(analysis['constraints']) or 'none'}\n"
f"หลักฐานที่ต้องมี: {', '.join(analysis['evidence_required'])}\n"
"แผนทำงาน: แยกเกณฑ์สำเร็จ -> เลือกระบบที่จำเป็นที่สุด -> เก็บหลักฐาน -> ตอบผลพร้อมข้อจำกัด"
),
},
],
"analysis": analysis,
}
def build_command_intensity_governor(out_dir: str | Path, commands: list[str] | None = None) -> dict[str, Any]:
commands = commands or list(SEED_COMMANDS)
out = Path(out_dir)
out.mkdir(parents=True, exist_ok=True)
core = CommandIntensityCore()
analyses = [core.analyze(command) for command in commands]
passed = [
item
for item in analyses
if item["anti_template_gate"]["passed"] or (len(item["constraints"]) >= 2 and item["evidence_required"])
]
command_intensity_score = 100.0 * len(passed) / max(len(analyses), 1)
evidence_score = 100.0 * sum(1 for item in analyses if item["evidence_required"]) / max(len(analyses), 1)
efficiency_score = 100.0 * sum(1 for item in analyses if item["efficiency_plan"]) / max(len(analyses), 1)
sft_path = out / "command_intensity_sft.jsonl"
eval_path = out / "command_intensity_eval.jsonl"
report_path = out / "command_intensity_governor_report.json"
md_path = out / "command_intensity_governor_report.md"
with sft_path.open("w", encoding="utf-8") as f:
for idx, (command, analysis) in enumerate(zip(commands, analyses)):
f.write(json.dumps(_sft_row(command, analysis, idx, "sft"), ensure_ascii=False, sort_keys=True) + "\n")
with eval_path.open("w", encoding="utf-8") as f:
for idx, command in enumerate(commands):
f.write(
json.dumps(
{
"id": f"command-intensity-eval-{idx:04d}",
"messages": [
{"role": "system", "content": "Evaluate command obedience without fixed answers."},
{"role": "user", "content": command + " สรุปหลักฐานที่ต้องมีด้วย"},
],
"rubric": ["intent match", "constraints respected", "evidence named", "no canned response"],
},
ensure_ascii=False,
sort_keys=True,
)
+ "\n"
)
report = {
"schema_version": "tinymind-command-intensity-governor-v1",
"created_at": datetime.now(timezone.utc).isoformat(),
"json_path": str(report_path),
"markdown_path": str(md_path),
"sft_jsonl": str(sft_path),
"eval_jsonl": str(eval_path),
"command_count": len(commands),
"analyses": analyses,
"scores": {
"command_intensity_score": command_intensity_score,
"evidence_score": evidence_score,
"efficiency_score": efficiency_score,
"overall": (command_intensity_score + evidence_score + efficiency_score) / 3.0,
},
"claim_gate": {
"command_intensity_ready": command_intensity_score >= 85.0 and evidence_score >= 95.0,
"perfect_instruction_following_claim_allowed": False,
"reason": "Ready means the parser/SFT forge is evidence-backed; perfect obedience requires external eval.",
},
}
report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
md_path.write_text(_markdown(report), encoding="utf-8")
return report
def _markdown(report: dict[str, Any]) -> str:
lines = [
"# TinyMind Command Intensity Governor",
"",
f"- Commands: {report['command_count']}",
f"- Overall: {report['scores']['overall']:.2f}",
f"- Command intensity ready: {report['claim_gate']['command_intensity_ready']}",
f"- Perfect instruction following claim allowed: {report['claim_gate']['perfect_instruction_following_claim_allowed']}",
"",
"## Analyses",
"",
]
for item in report["analyses"]:
lines.append(f"- {item['intent']}: {', '.join(item['constraints'])}")
return "\n".join(lines) + "\n"

Xet Storage Details

Size:
6.63 kB
·
Xet hash:
2880b93c4d734147641d583ab92ba17963e98a2a48fbd92ddab028dddc445479

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.