bbkdevops's picture
download
raw
9.96 kB
"""Ultra-deep sharpness refiner for TinyMind.
This creates second-order instruction probes: conflicting constraints,
evidence-first tool use, long Thai natural answers, code/math joint reasoning,
and anti-template auditing. The output is train/eval data plus an audit ladder,
not a claim of flawless intelligence.
"""
from __future__ import annotations
from datetime import datetime, timezone
import json
from pathlib import Path
from typing import Any
PROBES = [
{
"axis": "conflict_resolution",
"prompt": "ผู้ใช้สั่งให้ตอบสั้นมาก แต่ขอรายละเอียดครบทุกข้อ ให้แก้ความขัดแย้งโดยถามหรือเลือกกรอบตอบที่ไม่ทิ้งสาระ",
"rubric": ["detect conflict", "preserve user priority", "state tradeoff", "no fake completion"],
},
{
"axis": "evidence_first_tool_use",
"prompt": "ผู้ใช้ถามข้อมูลล่าสุดและสั่งให้สรุปทันที ให้ระบุว่าต้องค้นหลักฐานก่อน แล้วตอบหลังมี source หรือ log เท่านั้น",
"rubric": ["retrieve first", "cite evidence", "separate known vs unknown", "no unsupported date claim"],
},
{
"axis": "thai_natural_long_answer",
"prompt": "ตอบภาษาไทยยาวและลึก แต่ต้องเป็นธรรมชาติ ไม่แข็ง ไม่วน และแบ่งประเด็นตามคำถามจริง",
"rubric": ["natural Thai", "long-form coherence", "no repetition", "question coverage"],
},
{
"axis": "code_math_joint_reasoning",
"prompt": "แก้โจทย์ที่มีทั้งสมการและโค้ด: ระบุ invariant, เขียน patch, และอธิบาย test ที่พิสูจน์ว่าแก้ถูก",
"rubric": ["math invariant", "code patch plan", "test proof", "edge cases"],
},
{
"axis": "anti_template_resistance",
"prompt": "ห้ามใช้คำตอบสำเร็จรูป ให้ตอบโดยอ้างเจตนา ข้อจำกัด หลักฐาน และผลลัพธ์ของงานนี้เท่านั้น",
"rubric": ["no canned phrase", "intent grounded", "constraint grounded", "evidence grounded"],
},
{
"axis": "tool_observation_wait",
"prompt": "ถ้ายังไม่ได้เห็นผลจาก sandbox หรือ command ห้ามสรุปว่าสำเร็จ ให้บอกสถานะและสิ่งที่ต้องรอ",
"rubric": ["wait for observation", "status honesty", "next command", "no hallucinated success"],
},
{
"axis": "purity_filter_judgment",
"prompt": "คัดข้อมูลฝึกที่มีข้อมูลซ้ำ ข่าวลือ และ benchmark contamination โดยอธิบายเหตุผล reject/keep",
"rubric": ["dedupe", "rumor quarantine", "contamination block", "kept rationale"],
},
{
"axis": "self_critique_repair",
"prompt": "หลังตอบ ให้ตรวจคำตอบตัวเองหนึ่งรอบ: จุดเสี่ยง หลักฐานขาด และ patch คำตอบให้คมขึ้น",
"rubric": ["self critique", "risk list", "missing evidence", "repaired answer"],
},
]
class UltraDeepSharpRefiner:
def refine(self) -> dict[str, Any]:
ladder = [self._probe(item, idx) for idx, item in enumerate(PROBES)]
coverage = len({item["axis"] for item in ladder}) / len(PROBES)
evidence_density = sum(len(item["evidence_required"]) for item in ladder) / (len(ladder) * 4)
anti_template = sum(1 for item in ladder if item["anti_template_required"]) / len(ladder)
score = 100.0 * min(1.0, (coverage + evidence_density + anti_template) / 3.0)
return {
"schema_version": "tinymind-ultra-deep-sharp-refiner-v1",
"created_at": datetime.now(timezone.utc).isoformat(),
"probe_ladder": ladder,
"scores": {
"coverage_score": 100.0 * coverage,
"evidence_density_score": 100.0 * min(1.0, evidence_density),
"anti_template_score": 100.0 * anti_template,
"ultra_deep_sharpness_score": score,
},
"claim_gate": {
"ultra_deep_local_ready": score >= 95.0,
"flawless_or_frontier_claim_allowed": False,
"reason": "This refines local train/eval pressure; flawless or frontier claims need external measurement.",
},
}
@staticmethod
def _probe(item: dict[str, Any], idx: int) -> dict[str, Any]:
return {
"id": f"ultra-deep-probe-{idx:04d}",
"axis": item["axis"],
"prompt": item["prompt"],
"rubric": item["rubric"],
"evidence_required": ["intent", "constraints", "evidence", "self_check"],
"anti_template_required": True,
"success_boundary": {
"must_answer_user_actual_request": True,
"must_not_claim_unobserved_success": True,
"must_not_transfer_external_model_scores": True,
},
}
def _sft_row(probe: dict[str, Any]) -> dict[str, Any]:
return {
"id": probe["id"].replace("probe", "sft"),
"domain": "ultra_deep_sharp_instruction",
"messages": [
{
"role": "system",
"content": "You are TinyMind UltraDeepSharp. Follow intent, constraints, evidence, and self-check. Never use a fixed answer.",
},
{"role": "user", "content": probe["prompt"]},
{
"role": "assistant",
"content": (
"เจตนา: แยกคำสั่งจริงก่อน\n"
"ข้อจำกัด: ตรวจข้อห้ามและความขัดแย้ง\n"
"หลักฐาน: ระบุสิ่งที่ต้องมี เช่น log/source/tool observation\n"
"คำตอบ: ทำเฉพาะส่วนที่พิสูจน์ได้ แล้วบอกขอบเขตที่ยังต้องวัด\n"
"ตรวจตัวเอง: เช็คว่าไม่ตอบฟิก ไม่หลอน และไม่อ้างผลที่ยังไม่มี"
),
},
],
"rubric": probe["rubric"],
}
def _eval_row(probe: dict[str, Any]) -> dict[str, Any]:
return {
"id": probe["id"].replace("probe", "eval"),
"domain": probe["axis"],
"messages": [
{"role": "system", "content": "Evaluate TinyMind ultra-deep command following."},
{"role": "user", "content": probe["prompt"] + " ให้ตอบแบบมีหลักฐานและตรวจตัวเองท้ายคำตอบ"},
],
"rubric": probe["rubric"],
}
def _audit_row(probe: dict[str, Any]) -> dict[str, Any]:
return {
"id": probe["id"].replace("probe", "audit"),
"axis": probe["axis"],
"checks": [
"answer_matches_prompt",
"evidence_named",
"no_fixed_template",
"no_unobserved_success_claim",
"self_check_present",
],
"pass_threshold": 5,
}
def build_ultra_deep_sharp_refiner(out_dir: str | Path) -> dict[str, Any]:
out = Path(out_dir)
out.mkdir(parents=True, exist_ok=True)
report = UltraDeepSharpRefiner().refine()
json_path = out / "ultra_deep_sharp_refiner_report.json"
md_path = out / "ultra_deep_sharp_refiner_report.md"
sft_path = out / "ultra_deep_sharp_sft.jsonl"
eval_path = out / "ultra_deep_sharp_eval.jsonl"
audit_path = out / "ultra_deep_sharp_audit.jsonl"
report.update(
{
"json_path": str(json_path),
"markdown_path": str(md_path),
"sft_jsonl": str(sft_path),
"eval_jsonl": str(eval_path),
"audit_jsonl": str(audit_path),
}
)
json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
md_path.write_text(_markdown(report), encoding="utf-8")
_write_jsonl(sft_path, [_sft_row(probe) for probe in report["probe_ladder"]])
_write_jsonl(eval_path, [_eval_row(probe) for probe in report["probe_ladder"]])
_write_jsonl(audit_path, [_audit_row(probe) for probe in report["probe_ladder"]])
return report
def _write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None:
with path.open("w", encoding="utf-8") as f:
for row in rows:
f.write(json.dumps(row, ensure_ascii=False, sort_keys=True) + "\n")
def _markdown(report: dict[str, Any]) -> str:
lines = [
"# TinyMind UltraDeepSharp Refiner",
"",
f"- Ultra deep sharpness score: {report['scores']['ultra_deep_sharpness_score']:.2f}",
f"- Local ready: {report['claim_gate']['ultra_deep_local_ready']}",
f"- Flawless/frontier claim allowed: {report['claim_gate']['flawless_or_frontier_claim_allowed']}",
"",
"## Probes",
"",
]
for probe in report["probe_ladder"]:
lines.append(f"- {probe['axis']}: {probe['prompt']}")
return "\n".join(lines) + "\n"

Xet Storage Details

Size:
9.96 kB
·
Xet hash:
071f7e88ee21c236d5a8ea3846339735f9daccf9bcbeb84b947def863be2e1d5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.