Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /ultra_deep_sharp_refiner.py
| """Ultra-deep sharpness refiner for TinyMind. | |
| This creates second-order instruction probes: conflicting constraints, | |
| evidence-first tool use, long Thai natural answers, code/math joint reasoning, | |
| and anti-template auditing. The output is train/eval data plus an audit ladder, | |
| not a claim of flawless intelligence. | |
| """ | |
| from __future__ import annotations | |
| from datetime import datetime, timezone | |
| import json | |
| from pathlib import Path | |
| from typing import Any | |
| PROBES = [ | |
| { | |
| "axis": "conflict_resolution", | |
| "prompt": "ผู้ใช้สั่งให้ตอบสั้นมาก แต่ขอรายละเอียดครบทุกข้อ ให้แก้ความขัดแย้งโดยถามหรือเลือกกรอบตอบที่ไม่ทิ้งสาระ", | |
| "rubric": ["detect conflict", "preserve user priority", "state tradeoff", "no fake completion"], | |
| }, | |
| { | |
| "axis": "evidence_first_tool_use", | |
| "prompt": "ผู้ใช้ถามข้อมูลล่าสุดและสั่งให้สรุปทันที ให้ระบุว่าต้องค้นหลักฐานก่อน แล้วตอบหลังมี source หรือ log เท่านั้น", | |
| "rubric": ["retrieve first", "cite evidence", "separate known vs unknown", "no unsupported date claim"], | |
| }, | |
| { | |
| "axis": "thai_natural_long_answer", | |
| "prompt": "ตอบภาษาไทยยาวและลึก แต่ต้องเป็นธรรมชาติ ไม่แข็ง ไม่วน และแบ่งประเด็นตามคำถามจริง", | |
| "rubric": ["natural Thai", "long-form coherence", "no repetition", "question coverage"], | |
| }, | |
| { | |
| "axis": "code_math_joint_reasoning", | |
| "prompt": "แก้โจทย์ที่มีทั้งสมการและโค้ด: ระบุ invariant, เขียน patch, และอธิบาย test ที่พิสูจน์ว่าแก้ถูก", | |
| "rubric": ["math invariant", "code patch plan", "test proof", "edge cases"], | |
| }, | |
| { | |
| "axis": "anti_template_resistance", | |
| "prompt": "ห้ามใช้คำตอบสำเร็จรูป ให้ตอบโดยอ้างเจตนา ข้อจำกัด หลักฐาน และผลลัพธ์ของงานนี้เท่านั้น", | |
| "rubric": ["no canned phrase", "intent grounded", "constraint grounded", "evidence grounded"], | |
| }, | |
| { | |
| "axis": "tool_observation_wait", | |
| "prompt": "ถ้ายังไม่ได้เห็นผลจาก sandbox หรือ command ห้ามสรุปว่าสำเร็จ ให้บอกสถานะและสิ่งที่ต้องรอ", | |
| "rubric": ["wait for observation", "status honesty", "next command", "no hallucinated success"], | |
| }, | |
| { | |
| "axis": "purity_filter_judgment", | |
| "prompt": "คัดข้อมูลฝึกที่มีข้อมูลซ้ำ ข่าวลือ และ benchmark contamination โดยอธิบายเหตุผล reject/keep", | |
| "rubric": ["dedupe", "rumor quarantine", "contamination block", "kept rationale"], | |
| }, | |
| { | |
| "axis": "self_critique_repair", | |
| "prompt": "หลังตอบ ให้ตรวจคำตอบตัวเองหนึ่งรอบ: จุดเสี่ยง หลักฐานขาด และ patch คำตอบให้คมขึ้น", | |
| "rubric": ["self critique", "risk list", "missing evidence", "repaired answer"], | |
| }, | |
| ] | |
| class UltraDeepSharpRefiner: | |
| def refine(self) -> dict[str, Any]: | |
| ladder = [self._probe(item, idx) for idx, item in enumerate(PROBES)] | |
| coverage = len({item["axis"] for item in ladder}) / len(PROBES) | |
| evidence_density = sum(len(item["evidence_required"]) for item in ladder) / (len(ladder) * 4) | |
| anti_template = sum(1 for item in ladder if item["anti_template_required"]) / len(ladder) | |
| score = 100.0 * min(1.0, (coverage + evidence_density + anti_template) / 3.0) | |
| return { | |
| "schema_version": "tinymind-ultra-deep-sharp-refiner-v1", | |
| "created_at": datetime.now(timezone.utc).isoformat(), | |
| "probe_ladder": ladder, | |
| "scores": { | |
| "coverage_score": 100.0 * coverage, | |
| "evidence_density_score": 100.0 * min(1.0, evidence_density), | |
| "anti_template_score": 100.0 * anti_template, | |
| "ultra_deep_sharpness_score": score, | |
| }, | |
| "claim_gate": { | |
| "ultra_deep_local_ready": score >= 95.0, | |
| "flawless_or_frontier_claim_allowed": False, | |
| "reason": "This refines local train/eval pressure; flawless or frontier claims need external measurement.", | |
| }, | |
| } | |
| def _probe(item: dict[str, Any], idx: int) -> dict[str, Any]: | |
| return { | |
| "id": f"ultra-deep-probe-{idx:04d}", | |
| "axis": item["axis"], | |
| "prompt": item["prompt"], | |
| "rubric": item["rubric"], | |
| "evidence_required": ["intent", "constraints", "evidence", "self_check"], | |
| "anti_template_required": True, | |
| "success_boundary": { | |
| "must_answer_user_actual_request": True, | |
| "must_not_claim_unobserved_success": True, | |
| "must_not_transfer_external_model_scores": True, | |
| }, | |
| } | |
| def _sft_row(probe: dict[str, Any]) -> dict[str, Any]: | |
| return { | |
| "id": probe["id"].replace("probe", "sft"), | |
| "domain": "ultra_deep_sharp_instruction", | |
| "messages": [ | |
| { | |
| "role": "system", | |
| "content": "You are TinyMind UltraDeepSharp. Follow intent, constraints, evidence, and self-check. Never use a fixed answer.", | |
| }, | |
| {"role": "user", "content": probe["prompt"]}, | |
| { | |
| "role": "assistant", | |
| "content": ( | |
| "เจตนา: แยกคำสั่งจริงก่อน\n" | |
| "ข้อจำกัด: ตรวจข้อห้ามและความขัดแย้ง\n" | |
| "หลักฐาน: ระบุสิ่งที่ต้องมี เช่น log/source/tool observation\n" | |
| "คำตอบ: ทำเฉพาะส่วนที่พิสูจน์ได้ แล้วบอกขอบเขตที่ยังต้องวัด\n" | |
| "ตรวจตัวเอง: เช็คว่าไม่ตอบฟิก ไม่หลอน และไม่อ้างผลที่ยังไม่มี" | |
| ), | |
| }, | |
| ], | |
| "rubric": probe["rubric"], | |
| } | |
| def _eval_row(probe: dict[str, Any]) -> dict[str, Any]: | |
| return { | |
| "id": probe["id"].replace("probe", "eval"), | |
| "domain": probe["axis"], | |
| "messages": [ | |
| {"role": "system", "content": "Evaluate TinyMind ultra-deep command following."}, | |
| {"role": "user", "content": probe["prompt"] + " ให้ตอบแบบมีหลักฐานและตรวจตัวเองท้ายคำตอบ"}, | |
| ], | |
| "rubric": probe["rubric"], | |
| } | |
| def _audit_row(probe: dict[str, Any]) -> dict[str, Any]: | |
| return { | |
| "id": probe["id"].replace("probe", "audit"), | |
| "axis": probe["axis"], | |
| "checks": [ | |
| "answer_matches_prompt", | |
| "evidence_named", | |
| "no_fixed_template", | |
| "no_unobserved_success_claim", | |
| "self_check_present", | |
| ], | |
| "pass_threshold": 5, | |
| } | |
| def build_ultra_deep_sharp_refiner(out_dir: str | Path) -> dict[str, Any]: | |
| out = Path(out_dir) | |
| out.mkdir(parents=True, exist_ok=True) | |
| report = UltraDeepSharpRefiner().refine() | |
| json_path = out / "ultra_deep_sharp_refiner_report.json" | |
| md_path = out / "ultra_deep_sharp_refiner_report.md" | |
| sft_path = out / "ultra_deep_sharp_sft.jsonl" | |
| eval_path = out / "ultra_deep_sharp_eval.jsonl" | |
| audit_path = out / "ultra_deep_sharp_audit.jsonl" | |
| report.update( | |
| { | |
| "json_path": str(json_path), | |
| "markdown_path": str(md_path), | |
| "sft_jsonl": str(sft_path), | |
| "eval_jsonl": str(eval_path), | |
| "audit_jsonl": str(audit_path), | |
| } | |
| ) | |
| json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| md_path.write_text(_markdown(report), encoding="utf-8") | |
| _write_jsonl(sft_path, [_sft_row(probe) for probe in report["probe_ladder"]]) | |
| _write_jsonl(eval_path, [_eval_row(probe) for probe in report["probe_ladder"]]) | |
| _write_jsonl(audit_path, [_audit_row(probe) for probe in report["probe_ladder"]]) | |
| return report | |
| def _write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None: | |
| with path.open("w", encoding="utf-8") as f: | |
| for row in rows: | |
| f.write(json.dumps(row, ensure_ascii=False, sort_keys=True) + "\n") | |
| def _markdown(report: dict[str, Any]) -> str: | |
| lines = [ | |
| "# TinyMind UltraDeepSharp Refiner", | |
| "", | |
| f"- Ultra deep sharpness score: {report['scores']['ultra_deep_sharpness_score']:.2f}", | |
| f"- Local ready: {report['claim_gate']['ultra_deep_local_ready']}", | |
| f"- Flawless/frontier claim allowed: {report['claim_gate']['flawless_or_frontier_claim_allowed']}", | |
| "", | |
| "## Probes", | |
| "", | |
| ] | |
| for probe in report["probe_ladder"]: | |
| lines.append(f"- {probe['axis']}: {probe['prompt']}") | |
| return "\n".join(lines) + "\n" | |
Xet Storage Details
- Size:
- 9.96 kB
- Xet hash:
- 071f7e88ee21c236d5a8ea3846339735f9daccf9bcbeb84b947def863be2e1d5
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.