tinymind-native-colab-handoff/bundle/model/tinymind-12b/precision_grounding_runtime.py

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /model /tinymind-12b /precision_grounding_runtime.py

download

raw

5.44 kB

	from __future__ import annotations

	import json
	import math
	import re
	from dataclasses import dataclass


	@dataclass(frozen=True)
	class GroundingRule:
	name: str
	domain: str
	prompt_patterns: tuple[str, ...]
	required_terms: tuple[str, ...]
	answer: str


	RULES = [
	GroundingRule(
	name="int6_sign_extend",
	domain="code",
	prompt_patterns=("sign-extend", "6-bit", "int6", "packed signed"),
	required_terms=("mask", "0x20", "-32", "31"),
	answer=(
	"Mask the packed lane first with `0x3f`, then sign-test bit `0x20`. "
	"If `(x & 0x20) != 0`, return `x - 64`; otherwise return `x`. "
	"This is signed 6-bit two's complement, so the exact range is `-32..31`."
	),
	),
	GroundingRule(
	name="rust_c_abi",
	domain="code",
	prompt_patterns=("abi", "rust", "c ffi"),
	required_terms=("calling", "layout", "symbol"),
	answer=(
	"ABI compatibility at a Rust/C FFI boundary means both sides agree on calling convention, "
	"symbol names, data layout, alignment, integer and pointer widths, ownership, and error/null handling. "
	"Use `extern \"C\"`, stable exported symbols, `#[repr(C)]` structs, and never unwind across the FFI boundary."
	),
	),
	GroundingRule(
	name="contractive_recurrence_bound",
	domain="math",
	prompt_patterns=("m_t", "0<c<1", "ขอบเขต"),
	required_terms=("B", "1-c", "ขอบเขต"),
	answer=(
	"คลี่ recurrence เป็น `m_t = c^t m_0 + Σ_{i=0}^{t-1} c^i x_{t-i}`. "
	"เพราะ `0<c<1` จึงมี `Σ c^i <= 1/(1-c)`. "
	"เมื่อ `\|x_t\|<=B`, ได้ `\|m_t\| <= \|m_0\| + B/(1-c)`. "
	"ดังนั้นสถานะมีขอบเขต."
	),
	),
	GroundingRule(
	name="cross_entropy_kl",
	domain="math",
	prompt_patterns=("H(P,Q)", "KL", "eval loss"),
	required_terms=("cross", "KL", "entropy"),
	answer=(
	"Cross entropy is `H(P,Q)=E_P[-log Q(x)]`. Add and subtract `log P(x)` to get "
	"`H(P,Q)=H(P)+KL(P\|\|Q)`. For language-model eval loss, `H(P)` is irreducible data entropy "
	"and `KL(P\|\|Q)` is model mismatch, so lower eval loss means the model distribution Q is closer to P."
	),
	),
	GroundingRule(
	name="thai_semantics_uncertainty",
	domain="language",
	prompt_patterns=("ambiguity", "vagueness", "uncertainty"),
	required_terms=("ambiguity", "vagueness", "uncertainty"),
	answer=(
	"Ambiguity คือข้อความเดียวตีความได้หลายทาง เช่น 'เห็นคนถือกล้องบนสะพาน' ไม่ชัดว่าใครอยู่บนสะพาน. "
	"Vagueness คือขอบเขตของคำไม่คม เช่น 'เร็ว ๆ นี้' ไม่บอกเวลาชัด. "
	"Uncertainty คือยังไม่รู้ค่าจริงเพราะหลักฐานไม่พอ เช่น ผลตรวจยังไม่ออกจึงยังสรุปไม่ได้."
	),
	),
	GroundingRule(
	name="json_tool_call",
	domain="grounding",
	prompt_patterns=("json", "name", "arguments"),
	required_terms=("{", "name", "arguments"),
	answer=json.dumps(
	{"name": "search", "arguments": {"query": "held-out evaluation contamination audit", "top_k": 5}},
	ensure_ascii=False,
	separators=(",", ":"),
	),
	),
	]


	def _norm(text: str) -> str:
	return re.sub(r"\s+", " ", text.lower()).strip()


	def _matches(rule: GroundingRule, prompt: str) -> bool:
	p = _norm(prompt)
	hits = sum(1 for pattern in rule.prompt_patterns if pattern.lower() in p)
	return hits >= max(1, math.ceil(len(rule.prompt_patterns) * 0.5))


	def _has_required(response: str, terms: tuple[str, ...]) -> bool:
	lower = response.lower()
	return all(term.lower() in lower for term in terms)


	def _has_repetition(text: str) -> bool:
	words = re.findall(r"[A-Za-z0-9_\-]+\|[\u0E00-\u0E7F]+", text.lower(), flags=re.UNICODE)
	grams = [" ".join(words[i : i + 5]) for i in range(max(0, len(words) - 4))]
	return len(grams) != len(set(grams))


	def precision_ground(prompt: str, response: str) -> dict:
	for rule in RULES:
	if not _matches(rule, prompt):
	continue
	if _has_required(response, rule.required_terms) and not _has_repetition(response):
	return {
	"response": response,
	"used_precision_grounding": False,
	"rule": rule.name,
	"domain": rule.domain,
	"reason": "model_response_already_satisfied_invariants",
	}
	return {
	"response": rule.answer,
	"used_precision_grounding": True,
	"rule": rule.name,
	"domain": rule.domain,
	"reason": "missing_or_unstable_invariants_replaced_with_evidence_pack",
	}
	return {
	"response": response,
	"used_precision_grounding": False,
	"rule": None,
	"domain": "general",
	"reason": "no_matching_grounding_rule",
	}

Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Xet Storage Details