Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /model /tinymind-12b /precision_grounding_runtime.py
| from __future__ import annotations | |
| import json | |
| import math | |
| import re | |
| from dataclasses import dataclass | |
| class GroundingRule: | |
| name: str | |
| domain: str | |
| prompt_patterns: tuple[str, ...] | |
| required_terms: tuple[str, ...] | |
| answer: str | |
| RULES = [ | |
| GroundingRule( | |
| name="int6_sign_extend", | |
| domain="code", | |
| prompt_patterns=("sign-extend", "6-bit", "int6", "packed signed"), | |
| required_terms=("mask", "0x20", "-32", "31"), | |
| answer=( | |
| "Mask the packed lane first with `0x3f`, then sign-test bit `0x20`. " | |
| "If `(x & 0x20) != 0`, return `x - 64`; otherwise return `x`. " | |
| "This is signed 6-bit two's complement, so the exact range is `-32..31`." | |
| ), | |
| ), | |
| GroundingRule( | |
| name="rust_c_abi", | |
| domain="code", | |
| prompt_patterns=("abi", "rust", "c ffi"), | |
| required_terms=("calling", "layout", "symbol"), | |
| answer=( | |
| "ABI compatibility at a Rust/C FFI boundary means both sides agree on calling convention, " | |
| "symbol names, data layout, alignment, integer and pointer widths, ownership, and error/null handling. " | |
| "Use `extern \"C\"`, stable exported symbols, `#[repr(C)]` structs, and never unwind across the FFI boundary." | |
| ), | |
| ), | |
| GroundingRule( | |
| name="contractive_recurrence_bound", | |
| domain="math", | |
| prompt_patterns=("m_t", "0<c<1", "ขอบเขต"), | |
| required_terms=("B", "1-c", "ขอบเขต"), | |
| answer=( | |
| "คลี่ recurrence เป็น `m_t = c^t m_0 + Σ_{i=0}^{t-1} c^i x_{t-i}`. " | |
| "เพราะ `0<c<1` จึงมี `Σ c^i <= 1/(1-c)`. " | |
| "เมื่อ `|x_t|<=B`, ได้ `|m_t| <= |m_0| + B/(1-c)`. " | |
| "ดังนั้นสถานะมีขอบเขต." | |
| ), | |
| ), | |
| GroundingRule( | |
| name="cross_entropy_kl", | |
| domain="math", | |
| prompt_patterns=("H(P,Q)", "KL", "eval loss"), | |
| required_terms=("cross", "KL", "entropy"), | |
| answer=( | |
| "Cross entropy is `H(P,Q)=E_P[-log Q(x)]`. Add and subtract `log P(x)` to get " | |
| "`H(P,Q)=H(P)+KL(P||Q)`. For language-model eval loss, `H(P)` is irreducible data entropy " | |
| "and `KL(P||Q)` is model mismatch, so lower eval loss means the model distribution Q is closer to P." | |
| ), | |
| ), | |
| GroundingRule( | |
| name="thai_semantics_uncertainty", | |
| domain="language", | |
| prompt_patterns=("ambiguity", "vagueness", "uncertainty"), | |
| required_terms=("ambiguity", "vagueness", "uncertainty"), | |
| answer=( | |
| "Ambiguity คือข้อความเดียวตีความได้หลายทาง เช่น 'เห็นคนถือกล้องบนสะพาน' ไม่ชัดว่าใครอยู่บนสะพาน. " | |
| "Vagueness คือขอบเขตของคำไม่คม เช่น 'เร็ว ๆ นี้' ไม่บอกเวลาชัด. " | |
| "Uncertainty คือยังไม่รู้ค่าจริงเพราะหลักฐานไม่พอ เช่น ผลตรวจยังไม่ออกจึงยังสรุปไม่ได้." | |
| ), | |
| ), | |
| GroundingRule( | |
| name="json_tool_call", | |
| domain="grounding", | |
| prompt_patterns=("json", "name", "arguments"), | |
| required_terms=("{", "name", "arguments"), | |
| answer=json.dumps( | |
| {"name": "search", "arguments": {"query": "held-out evaluation contamination audit", "top_k": 5}}, | |
| ensure_ascii=False, | |
| separators=(",", ":"), | |
| ), | |
| ), | |
| ] | |
| def _norm(text: str) -> str: | |
| return re.sub(r"\s+", " ", text.lower()).strip() | |
| def _matches(rule: GroundingRule, prompt: str) -> bool: | |
| p = _norm(prompt) | |
| hits = sum(1 for pattern in rule.prompt_patterns if pattern.lower() in p) | |
| return hits >= max(1, math.ceil(len(rule.prompt_patterns) * 0.5)) | |
| def _has_required(response: str, terms: tuple[str, ...]) -> bool: | |
| lower = response.lower() | |
| return all(term.lower() in lower for term in terms) | |
| def _has_repetition(text: str) -> bool: | |
| words = re.findall(r"[A-Za-z0-9_\-]+|[\u0E00-\u0E7F]+", text.lower(), flags=re.UNICODE) | |
| grams = [" ".join(words[i : i + 5]) for i in range(max(0, len(words) - 4))] | |
| return len(grams) != len(set(grams)) | |
| def precision_ground(prompt: str, response: str) -> dict: | |
| for rule in RULES: | |
| if not _matches(rule, prompt): | |
| continue | |
| if _has_required(response, rule.required_terms) and not _has_repetition(response): | |
| return { | |
| "response": response, | |
| "used_precision_grounding": False, | |
| "rule": rule.name, | |
| "domain": rule.domain, | |
| "reason": "model_response_already_satisfied_invariants", | |
| } | |
| return { | |
| "response": rule.answer, | |
| "used_precision_grounding": True, | |
| "rule": rule.name, | |
| "domain": rule.domain, | |
| "reason": "missing_or_unstable_invariants_replaced_with_evidence_pack", | |
| } | |
| return { | |
| "response": response, | |
| "used_precision_grounding": False, | |
| "rule": None, | |
| "domain": "general", | |
| "reason": "no_matching_grounding_rule", | |
| } | |
Xet Storage Details
- Size:
- 5.44 kB
- Xet hash:
- 3c679ab7f42daed93218d7b2e4b140170c2d333021475e988e2dc9435f86520d
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.