Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /elastic_answer.py

bbkdevops

about 1 month ago

download

raw

13.7 kB

	"""Elastic Answer Protocol for variable short/deep grounded responses.

	This is TinyMind's answer-lattice layer: a small model can answer briefly or
	expand into long, evidence-linked parts without depending on a giant parameter
	store. The protocol separates the core claim, evidence ribs, and expansion
	leaves so long answers stay controllable and auditable.
	"""

	from __future__ import annotations

	from datetime import datetime, timezone
	import json
	from pathlib import Path

	from evaluation.pure_oracle_kernel import PureOracleKernel


	ANSWER_MODES = {"auto", "brief", "standard", "deep", "exhaustive", "fluid"}


	def infer_answer_mode(question: str, requested: str = "auto") -> str:
	if requested != "auto":
	if requested not in ANSWER_MODES:
	raise ValueError(f"unknown answer mode: {requested}")
	return requested
	q = question.lower()
	if any(marker in q for marker in ("ไร้รูปแบบ", "ไม่ตายตัว", "ธรรมชาติ", "fluid", "freeform")):
	return "fluid"
	if any(marker in q for marker in ("สั้น", "short", "brief", "สรุป")):
	return "brief"
	if any(marker in q for marker in ("ละเอียด", "ยาว", "deep", "exhaustive", "no limit", "ไม่จำกัด", "ทั้งหมด")):
	return "exhaustive"
	return "standard"


	def _budget(mode: str) -> dict:
	table = {
	"brief": {"target_parts": 1, "target_sections": 1, "continuation_allowed": False},
	"standard": {"target_parts": 1, "target_sections": 3, "continuation_allowed": False},
	"deep": {"target_parts": 2, "target_sections": 6, "continuation_allowed": True},
	"exhaustive": {"target_parts": 4, "target_sections": 12, "continuation_allowed": True},
	"fluid": {"target_parts": 1, "target_sections": 0, "continuation_allowed": True},
	}
	return table[mode]


	class ElasticAnswerProtocol:
	"""Compose grounded answers at variable depth using the PureOracle kernel."""

	def __init__(self, ledger_dir: str \| Path):
	self.kernel = PureOracleKernel(ledger_dir)

	def compose(self, question: str, mode: str = "auto", top_k: int = 8) -> dict:
	chosen_mode = infer_answer_mode(question, mode)
	oracle = self.kernel.answer(question, top_k=top_k)
	budget = _budget(chosen_mode)
	if oracle["status"] != "grounded":
	result = self._insufficient(question, chosen_mode, oracle)
	elif oracle["route"] == "logic_prover":
	result = self._logic_answer(question, chosen_mode, oracle, budget)
	else:
	result = self._evidence_answer(question, chosen_mode, oracle, budget)
	result["schema_version"] = "tinymind-elastic-answer-protocol-v1"
	result["created_at"] = datetime.now(timezone.utc).isoformat()
	result["mode"] = chosen_mode
	result["answer_lattice"] = {
	"nucleus": "one compact source-grounded answer",
	"ribs": "hash-backed evidence chunks and proof steps",
	"leaves": "optional expansion parts that can continue without changing the nucleus",
	}
	result["length_policy"] = {
	"artificial_hard_cap": False,
	"continuation_allowed": budget["continuation_allowed"],
	"reason": "Long answers are emitted as grounded parts; runtime clients may request the next part instead of forcing one giant context.",
	}
	result["form_policy"] = {
	"fixed_template_required": False,
	"mode": "freeform_grounded" if chosen_mode == "fluid" else "structured_grounded",
	"reason": "The answer shape is selected from intent and evidence; audit metadata stays structured outside the prose.",
	}
	result["quality_claim"] = {
	"world_first_or_top_world_claim_allowed": False,
	"reason": "The protocol is local original infrastructure evidence; superiority requires external comparable benchmarks.",
	}
	return result

	def _insufficient(self, question: str, mode: str, oracle: dict) -> dict:
	return {
	"question": question,
	"status": "insufficient_evidence",
	"route": oracle["route"],
	"answer": "หลักฐานยังไม่พอ จึงไม่ควรขยายคำตอบยาว เพราะจะเพิ่มความเสี่ยงหลอน",
	"parts": [
	{
	"part": 1,
	"title": "Evidence Gate",
	"content": "ต้องค้นหรือเพิ่ม source ledger ก่อนตอบแบบละเอียด",
	"grounded": False,
	}
	],
	"oracle": oracle,
	"next_actions": [
	"ingest source files or official web evidence into the ledger",
	"rerun elastic-answer with external research or a richer ledger",
	],
	}

	def _logic_answer(self, question: str, mode: str, oracle: dict, budget: dict) -> dict:
	proof = oracle["logic"].get("proof_steps", [])
	parts = [
	{
	"part": 1,
	"title": "Direct Answer",
	"content": oracle["answer"],
	"grounded": True,
	"support": {"route": "logic_prover", "rule": oracle["logic"].get("rule")},
	}
	]
	if budget["target_sections"] > 1:
	parts.append(
	{
	"part": 2,
	"title": "Proof",
	"content": " -> ".join(proof),
	"grounded": True,
	"support": {"proof_steps": proof},
	}
	)
	return {
	"question": question,
	"status": "grounded",
	"route": "elastic_logic_fluid" if mode == "fluid" else "elastic_logic",
	"answer": self._fluid_logic(oracle) if mode == "fluid" else self._join_parts(parts),
	"parts": parts,
	"oracle": oracle,
	"continuation_cursor": None,
	}

	def _evidence_answer(self, question: str, mode: str, oracle: dict, budget: dict) -> dict:
	evidence = oracle["evidence"]
	supported = oracle["grounding_gate"]["support"].get("supported_terms", [])
	if mode == "fluid":
	answer = self._fluid_evidence_answer(question, oracle)
	return {
	"question": question,
	"status": "grounded",
	"route": "elastic_fluid_retrieval_grounding",
	"answer": answer,
	"parts": [
	{
	"part": 1,
	"title": "freeform",
	"content": answer,
	"grounded": True,
	"support": {
	"supported_terms": supported,
	"evidence_hashes": [row["evidence_hash"] for row in evidence],
	},
	}
	],
	"oracle": oracle,
	"continuation_cursor": {
	"instruction": "ask for more detail and the protocol will expand from the same grounded nucleus",
	"next_shape": self._choose_shape(question, evidence),
	},
	}
	parts = [
	{
	"part": 1,
	"title": "Core",
	"content": (
	f"คำตอบหลัก: คำถามนี้รองรับด้วย source chunks {len(evidence)} จุด "
	f"และคำสำคัญที่ตรวจพบคือ {', '.join(supported[:10])}."
	),
	"grounded": True,
	"support": {"supported_terms": supported},
	}
	]
	if budget["target_sections"] >= 3:
	parts.append(
	{
	"part": 2,
	"title": "Evidence Map",
	"content": self._evidence_map(evidence[: min(3, len(evidence))]),
	"grounded": True,
	"support": {"evidence_hashes": [row["evidence_hash"] for row in evidence[: min(3, len(evidence))]]},
	}
	)
	parts.append(
	{
	"part": 3,
	"title": "How To Expand",
	"content": (
	"ขยายคำตอบโดยอ่าน chunk ตามลำดับ score, สรุปเฉพาะ claim ที่มี hash, "
	"แล้วตรวจว่าแต่ละย่อหน้ามี source chunk รองรับ"
	),
	"grounded": True,
	"support": {"grounding_gate": oracle["grounding_gate"]["reason"]},
	}
	)
	if budget["target_sections"] >= 6:
	for row in evidence[: budget["target_sections"] - len(parts)]:
	parts.append(
	{
	"part": len(parts) + 1,
	"title": f"Source Chunk {row['chunk_id']}",
	"content": row["preview"],
	"grounded": True,
	"support": {
	"path": row["path"],
	"chunk_sha256": row["chunk_sha256"],
	"evidence_hash": row["evidence_hash"],
	"score": row["score"],
	},
	}
	)
	continuation_cursor = None
	if budget["continuation_allowed"] and len(evidence) > max(0, budget["target_sections"] - 3):
	continuation_cursor = {
	"next_evidence_index": max(0, budget["target_sections"] - 3),
	"instruction": "call again with mode=exhaustive and a higher top_k, or continue from this evidence index",
	}
	return {
	"question": question,
	"status": "grounded",
	"route": "elastic_retrieval_grounding",
	"answer": self._join_parts(parts),
	"parts": parts,
	"oracle": oracle,
	"continuation_cursor": continuation_cursor,
	}

	def _evidence_map(self, evidence: list[dict]) -> str:
	lines = []
	for idx, row in enumerate(evidence, start=1):
	lines.append(
	f"{idx}. {row['path']}#{row['chunk_id']} score={row['score']} sha={row['chunk_sha256'][:16]}"
	)
	return "\n".join(lines)

	def _fluid_logic(self, oracle: dict) -> str:
	proof = oracle["logic"].get("proof_steps", [])
	if not proof:
	return str(oracle["answer"])
	return f"{oracle['answer']} เพราะ {'; '.join(proof)}"

	def _fluid_evidence_answer(self, question: str, oracle: dict) -> str:
	evidence = oracle["evidence"]
	supported = oracle["grounding_gate"]["support"].get("supported_terms", [])
	shape = self._choose_shape(question, evidence)
	lead = "แกนคำตอบคือ"
	if shape == "direct":
	body = f"{lead} หลักฐานที่พบรองรับคำสำคัญ {', '.join(supported[:8])} จึงตอบได้โดยยึด source ที่แนบไว้"
	elif shape == "teaching":
	body = (
	"มองแบบง่าย ๆ ก่อน: ระบบไม่ได้พยายามจำทุกอย่างในหัวโมเดล แต่ดึงหลักฐานจริงกลับมา "
	f"แล้วค่อยให้ logic/grounding ตรวจซ้ำ จุดที่หลักฐานรองรับคือ {', '.join(supported[:8])}."
	)
	else:
	body = (
	f"{lead} มีหลักฐาน {len(evidence)} ชิ้นที่เชื่อมกับคำถามนี้ "
	"คำตอบจึงควรถูกขยายจากหลักฐานเหล่านั้นทีละชั้น ไม่ใช่เดาจาก pattern เปล่า ๆ"
	)
	source_note = "; ".join(f"{row['path']}#{row['chunk_id']}:{row['chunk_sha256'][:10]}" for row in evidence[:3])
	return f"{body} ตรวจย้อนกลับได้จาก {source_note}"

	def _choose_shape(self, question: str, evidence: list[dict]) -> str:
	q = question.lower()
	if any(marker in q for marker in ("อธิบาย", "เข้าใจง่าย", "why", "how")):
	return "teaching"
	if len(evidence) <= 2 or any(marker in q for marker in ("คืออะไร", "what", "สรุป")):
	return "direct"
	return "exploratory"

	def _join_parts(self, parts: list[dict]) -> str:
	return "\n\n".join(f"{part['title']}: {part['content']}" for part in parts)


	def write_elastic_answer(
	ledger_dir: str \| Path,
	question: str,
	out_path: str \| Path,
	mode: str = "auto",
	top_k: int = 8,
	) -> dict:
	result = ElasticAnswerProtocol(ledger_dir).compose(question, mode=mode, top_k=top_k)
	out = Path(out_path)
	out.parent.mkdir(parents=True, exist_ok=True)
	out.write_text(json.dumps(result, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
	result["out_path"] = str(out)
	return result

Xet Storage Details

Size:: 13.7 kB
Xet hash:: 295a6547da38cb43d7a31e828bf045894f9dc94c20082ccc73dea60d67a16a60

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.