bbkdevops's picture
download
raw
13.7 kB
"""Elastic Answer Protocol for variable short/deep grounded responses.
This is TinyMind's answer-lattice layer: a small model can answer briefly or
expand into long, evidence-linked parts without depending on a giant parameter
store. The protocol separates the core claim, evidence ribs, and expansion
leaves so long answers stay controllable and auditable.
"""
from __future__ import annotations
from datetime import datetime, timezone
import json
from pathlib import Path
from evaluation.pure_oracle_kernel import PureOracleKernel
ANSWER_MODES = {"auto", "brief", "standard", "deep", "exhaustive", "fluid"}
def infer_answer_mode(question: str, requested: str = "auto") -> str:
if requested != "auto":
if requested not in ANSWER_MODES:
raise ValueError(f"unknown answer mode: {requested}")
return requested
q = question.lower()
if any(marker in q for marker in ("ไร้รูปแบบ", "ไม่ตายตัว", "ธรรมชาติ", "fluid", "freeform")):
return "fluid"
if any(marker in q for marker in ("สั้น", "short", "brief", "สรุป")):
return "brief"
if any(marker in q for marker in ("ละเอียด", "ยาว", "deep", "exhaustive", "no limit", "ไม่จำกัด", "ทั้งหมด")):
return "exhaustive"
return "standard"
def _budget(mode: str) -> dict:
table = {
"brief": {"target_parts": 1, "target_sections": 1, "continuation_allowed": False},
"standard": {"target_parts": 1, "target_sections": 3, "continuation_allowed": False},
"deep": {"target_parts": 2, "target_sections": 6, "continuation_allowed": True},
"exhaustive": {"target_parts": 4, "target_sections": 12, "continuation_allowed": True},
"fluid": {"target_parts": 1, "target_sections": 0, "continuation_allowed": True},
}
return table[mode]
class ElasticAnswerProtocol:
"""Compose grounded answers at variable depth using the PureOracle kernel."""
def __init__(self, ledger_dir: str | Path):
self.kernel = PureOracleKernel(ledger_dir)
def compose(self, question: str, mode: str = "auto", top_k: int = 8) -> dict:
chosen_mode = infer_answer_mode(question, mode)
oracle = self.kernel.answer(question, top_k=top_k)
budget = _budget(chosen_mode)
if oracle["status"] != "grounded":
result = self._insufficient(question, chosen_mode, oracle)
elif oracle["route"] == "logic_prover":
result = self._logic_answer(question, chosen_mode, oracle, budget)
else:
result = self._evidence_answer(question, chosen_mode, oracle, budget)
result["schema_version"] = "tinymind-elastic-answer-protocol-v1"
result["created_at"] = datetime.now(timezone.utc).isoformat()
result["mode"] = chosen_mode
result["answer_lattice"] = {
"nucleus": "one compact source-grounded answer",
"ribs": "hash-backed evidence chunks and proof steps",
"leaves": "optional expansion parts that can continue without changing the nucleus",
}
result["length_policy"] = {
"artificial_hard_cap": False,
"continuation_allowed": budget["continuation_allowed"],
"reason": "Long answers are emitted as grounded parts; runtime clients may request the next part instead of forcing one giant context.",
}
result["form_policy"] = {
"fixed_template_required": False,
"mode": "freeform_grounded" if chosen_mode == "fluid" else "structured_grounded",
"reason": "The answer shape is selected from intent and evidence; audit metadata stays structured outside the prose.",
}
result["quality_claim"] = {
"world_first_or_top_world_claim_allowed": False,
"reason": "The protocol is local original infrastructure evidence; superiority requires external comparable benchmarks.",
}
return result
def _insufficient(self, question: str, mode: str, oracle: dict) -> dict:
return {
"question": question,
"status": "insufficient_evidence",
"route": oracle["route"],
"answer": "หลักฐานยังไม่พอ จึงไม่ควรขยายคำตอบยาว เพราะจะเพิ่มความเสี่ยงหลอน",
"parts": [
{
"part": 1,
"title": "Evidence Gate",
"content": "ต้องค้นหรือเพิ่ม source ledger ก่อนตอบแบบละเอียด",
"grounded": False,
}
],
"oracle": oracle,
"next_actions": [
"ingest source files or official web evidence into the ledger",
"rerun elastic-answer with external research or a richer ledger",
],
}
def _logic_answer(self, question: str, mode: str, oracle: dict, budget: dict) -> dict:
proof = oracle["logic"].get("proof_steps", [])
parts = [
{
"part": 1,
"title": "Direct Answer",
"content": oracle["answer"],
"grounded": True,
"support": {"route": "logic_prover", "rule": oracle["logic"].get("rule")},
}
]
if budget["target_sections"] > 1:
parts.append(
{
"part": 2,
"title": "Proof",
"content": " -> ".join(proof),
"grounded": True,
"support": {"proof_steps": proof},
}
)
return {
"question": question,
"status": "grounded",
"route": "elastic_logic_fluid" if mode == "fluid" else "elastic_logic",
"answer": self._fluid_logic(oracle) if mode == "fluid" else self._join_parts(parts),
"parts": parts,
"oracle": oracle,
"continuation_cursor": None,
}
def _evidence_answer(self, question: str, mode: str, oracle: dict, budget: dict) -> dict:
evidence = oracle["evidence"]
supported = oracle["grounding_gate"]["support"].get("supported_terms", [])
if mode == "fluid":
answer = self._fluid_evidence_answer(question, oracle)
return {
"question": question,
"status": "grounded",
"route": "elastic_fluid_retrieval_grounding",
"answer": answer,
"parts": [
{
"part": 1,
"title": "freeform",
"content": answer,
"grounded": True,
"support": {
"supported_terms": supported,
"evidence_hashes": [row["evidence_hash"] for row in evidence],
},
}
],
"oracle": oracle,
"continuation_cursor": {
"instruction": "ask for more detail and the protocol will expand from the same grounded nucleus",
"next_shape": self._choose_shape(question, evidence),
},
}
parts = [
{
"part": 1,
"title": "Core",
"content": (
f"คำตอบหลัก: คำถามนี้รองรับด้วย source chunks {len(evidence)} จุด "
f"และคำสำคัญที่ตรวจพบคือ {', '.join(supported[:10])}."
),
"grounded": True,
"support": {"supported_terms": supported},
}
]
if budget["target_sections"] >= 3:
parts.append(
{
"part": 2,
"title": "Evidence Map",
"content": self._evidence_map(evidence[: min(3, len(evidence))]),
"grounded": True,
"support": {"evidence_hashes": [row["evidence_hash"] for row in evidence[: min(3, len(evidence))]]},
}
)
parts.append(
{
"part": 3,
"title": "How To Expand",
"content": (
"ขยายคำตอบโดยอ่าน chunk ตามลำดับ score, สรุปเฉพาะ claim ที่มี hash, "
"แล้วตรวจว่าแต่ละย่อหน้ามี source chunk รองรับ"
),
"grounded": True,
"support": {"grounding_gate": oracle["grounding_gate"]["reason"]},
}
)
if budget["target_sections"] >= 6:
for row in evidence[: budget["target_sections"] - len(parts)]:
parts.append(
{
"part": len(parts) + 1,
"title": f"Source Chunk {row['chunk_id']}",
"content": row["preview"],
"grounded": True,
"support": {
"path": row["path"],
"chunk_sha256": row["chunk_sha256"],
"evidence_hash": row["evidence_hash"],
"score": row["score"],
},
}
)
continuation_cursor = None
if budget["continuation_allowed"] and len(evidence) > max(0, budget["target_sections"] - 3):
continuation_cursor = {
"next_evidence_index": max(0, budget["target_sections"] - 3),
"instruction": "call again with mode=exhaustive and a higher top_k, or continue from this evidence index",
}
return {
"question": question,
"status": "grounded",
"route": "elastic_retrieval_grounding",
"answer": self._join_parts(parts),
"parts": parts,
"oracle": oracle,
"continuation_cursor": continuation_cursor,
}
def _evidence_map(self, evidence: list[dict]) -> str:
lines = []
for idx, row in enumerate(evidence, start=1):
lines.append(
f"{idx}. {row['path']}#{row['chunk_id']} score={row['score']} sha={row['chunk_sha256'][:16]}"
)
return "\n".join(lines)
def _fluid_logic(self, oracle: dict) -> str:
proof = oracle["logic"].get("proof_steps", [])
if not proof:
return str(oracle["answer"])
return f"{oracle['answer']} เพราะ {'; '.join(proof)}"
def _fluid_evidence_answer(self, question: str, oracle: dict) -> str:
evidence = oracle["evidence"]
supported = oracle["grounding_gate"]["support"].get("supported_terms", [])
shape = self._choose_shape(question, evidence)
lead = "แกนคำตอบคือ"
if shape == "direct":
body = f"{lead} หลักฐานที่พบรองรับคำสำคัญ {', '.join(supported[:8])} จึงตอบได้โดยยึด source ที่แนบไว้"
elif shape == "teaching":
body = (
"มองแบบง่าย ๆ ก่อน: ระบบไม่ได้พยายามจำทุกอย่างในหัวโมเดล แต่ดึงหลักฐานจริงกลับมา "
f"แล้วค่อยให้ logic/grounding ตรวจซ้ำ จุดที่หลักฐานรองรับคือ {', '.join(supported[:8])}."
)
else:
body = (
f"{lead} มีหลักฐาน {len(evidence)} ชิ้นที่เชื่อมกับคำถามนี้ "
"คำตอบจึงควรถูกขยายจากหลักฐานเหล่านั้นทีละชั้น ไม่ใช่เดาจาก pattern เปล่า ๆ"
)
source_note = "; ".join(f"{row['path']}#{row['chunk_id']}:{row['chunk_sha256'][:10]}" for row in evidence[:3])
return f"{body} ตรวจย้อนกลับได้จาก {source_note}"
def _choose_shape(self, question: str, evidence: list[dict]) -> str:
q = question.lower()
if any(marker in q for marker in ("อธิบาย", "เข้าใจง่าย", "why", "how")):
return "teaching"
if len(evidence) <= 2 or any(marker in q for marker in ("คืออะไร", "what", "สรุป")):
return "direct"
return "exploratory"
def _join_parts(self, parts: list[dict]) -> str:
return "\n\n".join(f"{part['title']}: {part['content']}" for part in parts)
def write_elastic_answer(
ledger_dir: str | Path,
question: str,
out_path: str | Path,
mode: str = "auto",
top_k: int = 8,
) -> dict:
result = ElasticAnswerProtocol(ledger_dir).compose(question, mode=mode, top_k=top_k)
out = Path(out_path)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(result, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
result["out_path"] = str(out)
return result

Xet Storage Details

Size:
13.7 kB
·
Xet hash:
295a6547da38cb43d7a31e828bf045894f9dc94c20082ccc73dea60d67a16a60

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.