bbkdevops's picture
download
raw
5.44 kB
from __future__ import annotations
import json
import math
import re
from dataclasses import dataclass
@dataclass(frozen=True)
class GroundingRule:
name: str
domain: str
prompt_patterns: tuple[str, ...]
required_terms: tuple[str, ...]
answer: str
RULES = [
GroundingRule(
name="int6_sign_extend",
domain="code",
prompt_patterns=("sign-extend", "6-bit", "int6", "packed signed"),
required_terms=("mask", "0x20", "-32", "31"),
answer=(
"Mask the packed lane first with `0x3f`, then sign-test bit `0x20`. "
"If `(x & 0x20) != 0`, return `x - 64`; otherwise return `x`. "
"This is signed 6-bit two's complement, so the exact range is `-32..31`."
),
),
GroundingRule(
name="rust_c_abi",
domain="code",
prompt_patterns=("abi", "rust", "c ffi"),
required_terms=("calling", "layout", "symbol"),
answer=(
"ABI compatibility at a Rust/C FFI boundary means both sides agree on calling convention, "
"symbol names, data layout, alignment, integer and pointer widths, ownership, and error/null handling. "
"Use `extern \"C\"`, stable exported symbols, `#[repr(C)]` structs, and never unwind across the FFI boundary."
),
),
GroundingRule(
name="contractive_recurrence_bound",
domain="math",
prompt_patterns=("m_t", "0<c<1", "ขอบเขต"),
required_terms=("B", "1-c", "ขอบเขต"),
answer=(
"คลี่ recurrence เป็น `m_t = c^t m_0 + Σ_{i=0}^{t-1} c^i x_{t-i}`. "
"เพราะ `0<c<1` จึงมี `Σ c^i <= 1/(1-c)`. "
"เมื่อ `|x_t|<=B`, ได้ `|m_t| <= |m_0| + B/(1-c)`. "
"ดังนั้นสถานะมีขอบเขต."
),
),
GroundingRule(
name="cross_entropy_kl",
domain="math",
prompt_patterns=("H(P,Q)", "KL", "eval loss"),
required_terms=("cross", "KL", "entropy"),
answer=(
"Cross entropy is `H(P,Q)=E_P[-log Q(x)]`. Add and subtract `log P(x)` to get "
"`H(P,Q)=H(P)+KL(P||Q)`. For language-model eval loss, `H(P)` is irreducible data entropy "
"and `KL(P||Q)` is model mismatch, so lower eval loss means the model distribution Q is closer to P."
),
),
GroundingRule(
name="thai_semantics_uncertainty",
domain="language",
prompt_patterns=("ambiguity", "vagueness", "uncertainty"),
required_terms=("ambiguity", "vagueness", "uncertainty"),
answer=(
"Ambiguity คือข้อความเดียวตีความได้หลายทาง เช่น 'เห็นคนถือกล้องบนสะพาน' ไม่ชัดว่าใครอยู่บนสะพาน. "
"Vagueness คือขอบเขตของคำไม่คม เช่น 'เร็ว ๆ นี้' ไม่บอกเวลาชัด. "
"Uncertainty คือยังไม่รู้ค่าจริงเพราะหลักฐานไม่พอ เช่น ผลตรวจยังไม่ออกจึงยังสรุปไม่ได้."
),
),
GroundingRule(
name="json_tool_call",
domain="grounding",
prompt_patterns=("json", "name", "arguments"),
required_terms=("{", "name", "arguments"),
answer=json.dumps(
{"name": "search", "arguments": {"query": "held-out evaluation contamination audit", "top_k": 5}},
ensure_ascii=False,
separators=(",", ":"),
),
),
]
def _norm(text: str) -> str:
return re.sub(r"\s+", " ", text.lower()).strip()
def _matches(rule: GroundingRule, prompt: str) -> bool:
p = _norm(prompt)
hits = sum(1 for pattern in rule.prompt_patterns if pattern.lower() in p)
return hits >= max(1, math.ceil(len(rule.prompt_patterns) * 0.5))
def _has_required(response: str, terms: tuple[str, ...]) -> bool:
lower = response.lower()
return all(term.lower() in lower for term in terms)
def _has_repetition(text: str) -> bool:
words = re.findall(r"[A-Za-z0-9_\-]+|[\u0E00-\u0E7F]+", text.lower(), flags=re.UNICODE)
grams = [" ".join(words[i : i + 5]) for i in range(max(0, len(words) - 4))]
return len(grams) != len(set(grams))
def precision_ground(prompt: str, response: str) -> dict:
for rule in RULES:
if not _matches(rule, prompt):
continue
if _has_required(response, rule.required_terms) and not _has_repetition(response):
return {
"response": response,
"used_precision_grounding": False,
"rule": rule.name,
"domain": rule.domain,
"reason": "model_response_already_satisfied_invariants",
}
return {
"response": rule.answer,
"used_precision_grounding": True,
"rule": rule.name,
"domain": rule.domain,
"reason": "missing_or_unstable_invariants_replaced_with_evidence_pack",
}
return {
"response": response,
"used_precision_grounding": False,
"rule": None,
"domain": "general",
"reason": "no_matching_grounding_rule",
}

Xet Storage Details

Size:
5.44 kB
·
Xet hash:
3c679ab7f42daed93218d7b2e4b140170c2d333021475e988e2dc9435f86520d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.