Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /elastic_answer.py
| """Elastic Answer Protocol for variable short/deep grounded responses. | |
| This is TinyMind's answer-lattice layer: a small model can answer briefly or | |
| expand into long, evidence-linked parts without depending on a giant parameter | |
| store. The protocol separates the core claim, evidence ribs, and expansion | |
| leaves so long answers stay controllable and auditable. | |
| """ | |
| from __future__ import annotations | |
| from datetime import datetime, timezone | |
| import json | |
| from pathlib import Path | |
| from evaluation.pure_oracle_kernel import PureOracleKernel | |
| ANSWER_MODES = {"auto", "brief", "standard", "deep", "exhaustive", "fluid"} | |
| def infer_answer_mode(question: str, requested: str = "auto") -> str: | |
| if requested != "auto": | |
| if requested not in ANSWER_MODES: | |
| raise ValueError(f"unknown answer mode: {requested}") | |
| return requested | |
| q = question.lower() | |
| if any(marker in q for marker in ("ไร้รูปแบบ", "ไม่ตายตัว", "ธรรมชาติ", "fluid", "freeform")): | |
| return "fluid" | |
| if any(marker in q for marker in ("สั้น", "short", "brief", "สรุป")): | |
| return "brief" | |
| if any(marker in q for marker in ("ละเอียด", "ยาว", "deep", "exhaustive", "no limit", "ไม่จำกัด", "ทั้งหมด")): | |
| return "exhaustive" | |
| return "standard" | |
| def _budget(mode: str) -> dict: | |
| table = { | |
| "brief": {"target_parts": 1, "target_sections": 1, "continuation_allowed": False}, | |
| "standard": {"target_parts": 1, "target_sections": 3, "continuation_allowed": False}, | |
| "deep": {"target_parts": 2, "target_sections": 6, "continuation_allowed": True}, | |
| "exhaustive": {"target_parts": 4, "target_sections": 12, "continuation_allowed": True}, | |
| "fluid": {"target_parts": 1, "target_sections": 0, "continuation_allowed": True}, | |
| } | |
| return table[mode] | |
| class ElasticAnswerProtocol: | |
| """Compose grounded answers at variable depth using the PureOracle kernel.""" | |
| def __init__(self, ledger_dir: str | Path): | |
| self.kernel = PureOracleKernel(ledger_dir) | |
| def compose(self, question: str, mode: str = "auto", top_k: int = 8) -> dict: | |
| chosen_mode = infer_answer_mode(question, mode) | |
| oracle = self.kernel.answer(question, top_k=top_k) | |
| budget = _budget(chosen_mode) | |
| if oracle["status"] != "grounded": | |
| result = self._insufficient(question, chosen_mode, oracle) | |
| elif oracle["route"] == "logic_prover": | |
| result = self._logic_answer(question, chosen_mode, oracle, budget) | |
| else: | |
| result = self._evidence_answer(question, chosen_mode, oracle, budget) | |
| result["schema_version"] = "tinymind-elastic-answer-protocol-v1" | |
| result["created_at"] = datetime.now(timezone.utc).isoformat() | |
| result["mode"] = chosen_mode | |
| result["answer_lattice"] = { | |
| "nucleus": "one compact source-grounded answer", | |
| "ribs": "hash-backed evidence chunks and proof steps", | |
| "leaves": "optional expansion parts that can continue without changing the nucleus", | |
| } | |
| result["length_policy"] = { | |
| "artificial_hard_cap": False, | |
| "continuation_allowed": budget["continuation_allowed"], | |
| "reason": "Long answers are emitted as grounded parts; runtime clients may request the next part instead of forcing one giant context.", | |
| } | |
| result["form_policy"] = { | |
| "fixed_template_required": False, | |
| "mode": "freeform_grounded" if chosen_mode == "fluid" else "structured_grounded", | |
| "reason": "The answer shape is selected from intent and evidence; audit metadata stays structured outside the prose.", | |
| } | |
| result["quality_claim"] = { | |
| "world_first_or_top_world_claim_allowed": False, | |
| "reason": "The protocol is local original infrastructure evidence; superiority requires external comparable benchmarks.", | |
| } | |
| return result | |
| def _insufficient(self, question: str, mode: str, oracle: dict) -> dict: | |
| return { | |
| "question": question, | |
| "status": "insufficient_evidence", | |
| "route": oracle["route"], | |
| "answer": "หลักฐานยังไม่พอ จึงไม่ควรขยายคำตอบยาว เพราะจะเพิ่มความเสี่ยงหลอน", | |
| "parts": [ | |
| { | |
| "part": 1, | |
| "title": "Evidence Gate", | |
| "content": "ต้องค้นหรือเพิ่ม source ledger ก่อนตอบแบบละเอียด", | |
| "grounded": False, | |
| } | |
| ], | |
| "oracle": oracle, | |
| "next_actions": [ | |
| "ingest source files or official web evidence into the ledger", | |
| "rerun elastic-answer with external research or a richer ledger", | |
| ], | |
| } | |
| def _logic_answer(self, question: str, mode: str, oracle: dict, budget: dict) -> dict: | |
| proof = oracle["logic"].get("proof_steps", []) | |
| parts = [ | |
| { | |
| "part": 1, | |
| "title": "Direct Answer", | |
| "content": oracle["answer"], | |
| "grounded": True, | |
| "support": {"route": "logic_prover", "rule": oracle["logic"].get("rule")}, | |
| } | |
| ] | |
| if budget["target_sections"] > 1: | |
| parts.append( | |
| { | |
| "part": 2, | |
| "title": "Proof", | |
| "content": " -> ".join(proof), | |
| "grounded": True, | |
| "support": {"proof_steps": proof}, | |
| } | |
| ) | |
| return { | |
| "question": question, | |
| "status": "grounded", | |
| "route": "elastic_logic_fluid" if mode == "fluid" else "elastic_logic", | |
| "answer": self._fluid_logic(oracle) if mode == "fluid" else self._join_parts(parts), | |
| "parts": parts, | |
| "oracle": oracle, | |
| "continuation_cursor": None, | |
| } | |
| def _evidence_answer(self, question: str, mode: str, oracle: dict, budget: dict) -> dict: | |
| evidence = oracle["evidence"] | |
| supported = oracle["grounding_gate"]["support"].get("supported_terms", []) | |
| if mode == "fluid": | |
| answer = self._fluid_evidence_answer(question, oracle) | |
| return { | |
| "question": question, | |
| "status": "grounded", | |
| "route": "elastic_fluid_retrieval_grounding", | |
| "answer": answer, | |
| "parts": [ | |
| { | |
| "part": 1, | |
| "title": "freeform", | |
| "content": answer, | |
| "grounded": True, | |
| "support": { | |
| "supported_terms": supported, | |
| "evidence_hashes": [row["evidence_hash"] for row in evidence], | |
| }, | |
| } | |
| ], | |
| "oracle": oracle, | |
| "continuation_cursor": { | |
| "instruction": "ask for more detail and the protocol will expand from the same grounded nucleus", | |
| "next_shape": self._choose_shape(question, evidence), | |
| }, | |
| } | |
| parts = [ | |
| { | |
| "part": 1, | |
| "title": "Core", | |
| "content": ( | |
| f"คำตอบหลัก: คำถามนี้รองรับด้วย source chunks {len(evidence)} จุด " | |
| f"และคำสำคัญที่ตรวจพบคือ {', '.join(supported[:10])}." | |
| ), | |
| "grounded": True, | |
| "support": {"supported_terms": supported}, | |
| } | |
| ] | |
| if budget["target_sections"] >= 3: | |
| parts.append( | |
| { | |
| "part": 2, | |
| "title": "Evidence Map", | |
| "content": self._evidence_map(evidence[: min(3, len(evidence))]), | |
| "grounded": True, | |
| "support": {"evidence_hashes": [row["evidence_hash"] for row in evidence[: min(3, len(evidence))]]}, | |
| } | |
| ) | |
| parts.append( | |
| { | |
| "part": 3, | |
| "title": "How To Expand", | |
| "content": ( | |
| "ขยายคำตอบโดยอ่าน chunk ตามลำดับ score, สรุปเฉพาะ claim ที่มี hash, " | |
| "แล้วตรวจว่าแต่ละย่อหน้ามี source chunk รองรับ" | |
| ), | |
| "grounded": True, | |
| "support": {"grounding_gate": oracle["grounding_gate"]["reason"]}, | |
| } | |
| ) | |
| if budget["target_sections"] >= 6: | |
| for row in evidence[: budget["target_sections"] - len(parts)]: | |
| parts.append( | |
| { | |
| "part": len(parts) + 1, | |
| "title": f"Source Chunk {row['chunk_id']}", | |
| "content": row["preview"], | |
| "grounded": True, | |
| "support": { | |
| "path": row["path"], | |
| "chunk_sha256": row["chunk_sha256"], | |
| "evidence_hash": row["evidence_hash"], | |
| "score": row["score"], | |
| }, | |
| } | |
| ) | |
| continuation_cursor = None | |
| if budget["continuation_allowed"] and len(evidence) > max(0, budget["target_sections"] - 3): | |
| continuation_cursor = { | |
| "next_evidence_index": max(0, budget["target_sections"] - 3), | |
| "instruction": "call again with mode=exhaustive and a higher top_k, or continue from this evidence index", | |
| } | |
| return { | |
| "question": question, | |
| "status": "grounded", | |
| "route": "elastic_retrieval_grounding", | |
| "answer": self._join_parts(parts), | |
| "parts": parts, | |
| "oracle": oracle, | |
| "continuation_cursor": continuation_cursor, | |
| } | |
| def _evidence_map(self, evidence: list[dict]) -> str: | |
| lines = [] | |
| for idx, row in enumerate(evidence, start=1): | |
| lines.append( | |
| f"{idx}. {row['path']}#{row['chunk_id']} score={row['score']} sha={row['chunk_sha256'][:16]}" | |
| ) | |
| return "\n".join(lines) | |
| def _fluid_logic(self, oracle: dict) -> str: | |
| proof = oracle["logic"].get("proof_steps", []) | |
| if not proof: | |
| return str(oracle["answer"]) | |
| return f"{oracle['answer']} เพราะ {'; '.join(proof)}" | |
| def _fluid_evidence_answer(self, question: str, oracle: dict) -> str: | |
| evidence = oracle["evidence"] | |
| supported = oracle["grounding_gate"]["support"].get("supported_terms", []) | |
| shape = self._choose_shape(question, evidence) | |
| lead = "แกนคำตอบคือ" | |
| if shape == "direct": | |
| body = f"{lead} หลักฐานที่พบรองรับคำสำคัญ {', '.join(supported[:8])} จึงตอบได้โดยยึด source ที่แนบไว้" | |
| elif shape == "teaching": | |
| body = ( | |
| "มองแบบง่าย ๆ ก่อน: ระบบไม่ได้พยายามจำทุกอย่างในหัวโมเดล แต่ดึงหลักฐานจริงกลับมา " | |
| f"แล้วค่อยให้ logic/grounding ตรวจซ้ำ จุดที่หลักฐานรองรับคือ {', '.join(supported[:8])}." | |
| ) | |
| else: | |
| body = ( | |
| f"{lead} มีหลักฐาน {len(evidence)} ชิ้นที่เชื่อมกับคำถามนี้ " | |
| "คำตอบจึงควรถูกขยายจากหลักฐานเหล่านั้นทีละชั้น ไม่ใช่เดาจาก pattern เปล่า ๆ" | |
| ) | |
| source_note = "; ".join(f"{row['path']}#{row['chunk_id']}:{row['chunk_sha256'][:10]}" for row in evidence[:3]) | |
| return f"{body} ตรวจย้อนกลับได้จาก {source_note}" | |
| def _choose_shape(self, question: str, evidence: list[dict]) -> str: | |
| q = question.lower() | |
| if any(marker in q for marker in ("อธิบาย", "เข้าใจง่าย", "why", "how")): | |
| return "teaching" | |
| if len(evidence) <= 2 or any(marker in q for marker in ("คืออะไร", "what", "สรุป")): | |
| return "direct" | |
| return "exploratory" | |
| def _join_parts(self, parts: list[dict]) -> str: | |
| return "\n\n".join(f"{part['title']}: {part['content']}" for part in parts) | |
| def write_elastic_answer( | |
| ledger_dir: str | Path, | |
| question: str, | |
| out_path: str | Path, | |
| mode: str = "auto", | |
| top_k: int = 8, | |
| ) -> dict: | |
| result = ElasticAnswerProtocol(ledger_dir).compose(question, mode=mode, top_k=top_k) | |
| out = Path(out_path) | |
| out.parent.mkdir(parents=True, exist_ok=True) | |
| out.write_text(json.dumps(result, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| result["out_path"] = str(out) | |
| return result | |
Xet Storage Details
- Size:
- 13.7 kB
- Xet hash:
- 295a6547da38cb43d7a31e828bf045894f9dc94c20082ccc73dea60d67a16a60
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.