Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /ultra_deep_sharp_refiner.py

bbkdevops

about 1 month ago

download

raw

9.96 kB

	"""Ultra-deep sharpness refiner for TinyMind.

	This creates second-order instruction probes: conflicting constraints,
	evidence-first tool use, long Thai natural answers, code/math joint reasoning,
	and anti-template auditing. The output is train/eval data plus an audit ladder,
	not a claim of flawless intelligence.
	"""

	from __future__ import annotations

	from datetime import datetime, timezone
	import json
	from pathlib import Path
	from typing import Any


	PROBES = [
	{
	"axis": "conflict_resolution",
	"prompt": "ผู้ใช้สั่งให้ตอบสั้นมาก แต่ขอรายละเอียดครบทุกข้อ ให้แก้ความขัดแย้งโดยถามหรือเลือกกรอบตอบที่ไม่ทิ้งสาระ",
	"rubric": ["detect conflict", "preserve user priority", "state tradeoff", "no fake completion"],
	},
	{
	"axis": "evidence_first_tool_use",
	"prompt": "ผู้ใช้ถามข้อมูลล่าสุดและสั่งให้สรุปทันที ให้ระบุว่าต้องค้นหลักฐานก่อน แล้วตอบหลังมี source หรือ log เท่านั้น",
	"rubric": ["retrieve first", "cite evidence", "separate known vs unknown", "no unsupported date claim"],
	},
	{
	"axis": "thai_natural_long_answer",
	"prompt": "ตอบภาษาไทยยาวและลึก แต่ต้องเป็นธรรมชาติ ไม่แข็ง ไม่วน และแบ่งประเด็นตามคำถามจริง",
	"rubric": ["natural Thai", "long-form coherence", "no repetition", "question coverage"],
	},
	{
	"axis": "code_math_joint_reasoning",
	"prompt": "แก้โจทย์ที่มีทั้งสมการและโค้ด: ระบุ invariant, เขียน patch, และอธิบาย test ที่พิสูจน์ว่าแก้ถูก",
	"rubric": ["math invariant", "code patch plan", "test proof", "edge cases"],
	},
	{
	"axis": "anti_template_resistance",
	"prompt": "ห้ามใช้คำตอบสำเร็จรูป ให้ตอบโดยอ้างเจตนา ข้อจำกัด หลักฐาน และผลลัพธ์ของงานนี้เท่านั้น",
	"rubric": ["no canned phrase", "intent grounded", "constraint grounded", "evidence grounded"],
	},
	{
	"axis": "tool_observation_wait",
	"prompt": "ถ้ายังไม่ได้เห็นผลจาก sandbox หรือ command ห้ามสรุปว่าสำเร็จ ให้บอกสถานะและสิ่งที่ต้องรอ",
	"rubric": ["wait for observation", "status honesty", "next command", "no hallucinated success"],
	},
	{
	"axis": "purity_filter_judgment",
	"prompt": "คัดข้อมูลฝึกที่มีข้อมูลซ้ำ ข่าวลือ และ benchmark contamination โดยอธิบายเหตุผล reject/keep",
	"rubric": ["dedupe", "rumor quarantine", "contamination block", "kept rationale"],
	},
	{
	"axis": "self_critique_repair",
	"prompt": "หลังตอบ ให้ตรวจคำตอบตัวเองหนึ่งรอบ: จุดเสี่ยง หลักฐานขาด และ patch คำตอบให้คมขึ้น",
	"rubric": ["self critique", "risk list", "missing evidence", "repaired answer"],
	},
	]


	class UltraDeepSharpRefiner:
	def refine(self) -> dict[str, Any]:
	ladder = [self._probe(item, idx) for idx, item in enumerate(PROBES)]
	coverage = len({item["axis"] for item in ladder}) / len(PROBES)
	evidence_density = sum(len(item["evidence_required"]) for item in ladder) / (len(ladder) * 4)
	anti_template = sum(1 for item in ladder if item["anti_template_required"]) / len(ladder)
	score = 100.0 * min(1.0, (coverage + evidence_density + anti_template) / 3.0)
	return {
	"schema_version": "tinymind-ultra-deep-sharp-refiner-v1",
	"created_at": datetime.now(timezone.utc).isoformat(),
	"probe_ladder": ladder,
	"scores": {
	"coverage_score": 100.0 * coverage,
	"evidence_density_score": 100.0 * min(1.0, evidence_density),
	"anti_template_score": 100.0 * anti_template,
	"ultra_deep_sharpness_score": score,
	},
	"claim_gate": {
	"ultra_deep_local_ready": score >= 95.0,
	"flawless_or_frontier_claim_allowed": False,
	"reason": "This refines local train/eval pressure; flawless or frontier claims need external measurement.",
	},
	}

	@staticmethod
	def _probe(item: dict[str, Any], idx: int) -> dict[str, Any]:
	return {
	"id": f"ultra-deep-probe-{idx:04d}",
	"axis": item["axis"],
	"prompt": item["prompt"],
	"rubric": item["rubric"],
	"evidence_required": ["intent", "constraints", "evidence", "self_check"],
	"anti_template_required": True,
	"success_boundary": {
	"must_answer_user_actual_request": True,
	"must_not_claim_unobserved_success": True,
	"must_not_transfer_external_model_scores": True,
	},
	}


	def _sft_row(probe: dict[str, Any]) -> dict[str, Any]:
	return {
	"id": probe["id"].replace("probe", "sft"),
	"domain": "ultra_deep_sharp_instruction",
	"messages": [
	{
	"role": "system",
	"content": "You are TinyMind UltraDeepSharp. Follow intent, constraints, evidence, and self-check. Never use a fixed answer.",
	},
	{"role": "user", "content": probe["prompt"]},
	{
	"role": "assistant",
	"content": (
	"เจตนา: แยกคำสั่งจริงก่อน\n"
	"ข้อจำกัด: ตรวจข้อห้ามและความขัดแย้ง\n"
	"หลักฐาน: ระบุสิ่งที่ต้องมี เช่น log/source/tool observation\n"
	"คำตอบ: ทำเฉพาะส่วนที่พิสูจน์ได้ แล้วบอกขอบเขตที่ยังต้องวัด\n"
	"ตรวจตัวเอง: เช็คว่าไม่ตอบฟิก ไม่หลอน และไม่อ้างผลที่ยังไม่มี"
	),
	},
	],
	"rubric": probe["rubric"],
	}


	def _eval_row(probe: dict[str, Any]) -> dict[str, Any]:
	return {
	"id": probe["id"].replace("probe", "eval"),
	"domain": probe["axis"],
	"messages": [
	{"role": "system", "content": "Evaluate TinyMind ultra-deep command following."},
	{"role": "user", "content": probe["prompt"] + " ให้ตอบแบบมีหลักฐานและตรวจตัวเองท้ายคำตอบ"},
	],
	"rubric": probe["rubric"],
	}


	def _audit_row(probe: dict[str, Any]) -> dict[str, Any]:
	return {
	"id": probe["id"].replace("probe", "audit"),
	"axis": probe["axis"],
	"checks": [
	"answer_matches_prompt",
	"evidence_named",
	"no_fixed_template",
	"no_unobserved_success_claim",
	"self_check_present",
	],
	"pass_threshold": 5,
	}


	def build_ultra_deep_sharp_refiner(out_dir: str \| Path) -> dict[str, Any]:
	out = Path(out_dir)
	out.mkdir(parents=True, exist_ok=True)
	report = UltraDeepSharpRefiner().refine()
	json_path = out / "ultra_deep_sharp_refiner_report.json"
	md_path = out / "ultra_deep_sharp_refiner_report.md"
	sft_path = out / "ultra_deep_sharp_sft.jsonl"
	eval_path = out / "ultra_deep_sharp_eval.jsonl"
	audit_path = out / "ultra_deep_sharp_audit.jsonl"
	report.update(
	{
	"json_path": str(json_path),
	"markdown_path": str(md_path),
	"sft_jsonl": str(sft_path),
	"eval_jsonl": str(eval_path),
	"audit_jsonl": str(audit_path),
	}
	)
	json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
	md_path.write_text(_markdown(report), encoding="utf-8")
	_write_jsonl(sft_path, [_sft_row(probe) for probe in report["probe_ladder"]])
	_write_jsonl(eval_path, [_eval_row(probe) for probe in report["probe_ladder"]])
	_write_jsonl(audit_path, [_audit_row(probe) for probe in report["probe_ladder"]])
	return report


	def _write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None:
	with path.open("w", encoding="utf-8") as f:
	for row in rows:
	f.write(json.dumps(row, ensure_ascii=False, sort_keys=True) + "\n")


	def _markdown(report: dict[str, Any]) -> str:
	lines = [
	"# TinyMind UltraDeepSharp Refiner",
	"",
	f"- Ultra deep sharpness score: {report['scores']['ultra_deep_sharpness_score']:.2f}",
	f"- Local ready: {report['claim_gate']['ultra_deep_local_ready']}",
	f"- Flawless/frontier claim allowed: {report['claim_gate']['flawless_or_frontier_claim_allowed']}",
	"",
	"## Probes",
	"",
	]
	for probe in report["probe_ladder"]:
	lines.append(f"- {probe['axis']}: {probe['prompt']}")
	return "\n".join(lines) + "\n"

Xet Storage Details

Size:: 9.96 kB
Xet hash:: 071f7e88ee21c236d5a8ea3846339735f9daccf9bcbeb84b947def863be2e1d5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.