bbkdevops's picture
download
raw
8.44 kB
"""TinyMind Evo learning loop.
This is not a memory replay system. It turns failures into novel challenges,
tests whether a generalizable lesson improves held-out behavior, and promotes
only lessons that improve without purity loss.
"""
from __future__ import annotations
from dataclasses import asdict, dataclass
from hashlib import sha256
import re
from typing import Any
@dataclass(frozen=True)
class EvoLearningSeed:
skill: str
failure_prompt: str
failed_answer: str
expected_property: str
@dataclass(frozen=True)
class EvoLesson:
skill: str
kind: str
rule_text: str
source_failure_hash: str
holdout_delta: float
purity_delta: float
novelty_score: float
def lesson_id(self) -> str:
payload = f"{self.skill}|{self.kind}|{self.rule_text}|{self.source_failure_hash}"
return sha256(payload.encode("utf-8")).hexdigest()[:16]
class EvoLearningLoop:
def __init__(self, min_novelty: float = 0.35, min_holdout_delta: float = 0.02, min_purity_delta: float = 0.0):
self.min_novelty = float(min_novelty)
self.min_holdout_delta = float(min_holdout_delta)
self.min_purity_delta = float(min_purity_delta)
def run(self, seeds: list[EvoLearningSeed]) -> dict[str, Any]:
cycles = []
promoted: list[dict[str, Any]] = []
blocked: list[dict[str, Any]] = []
memory_replay_rejected = False
for seed in seeds:
challenge = self._build_challenge(seed)
lesson = self._distill_lesson(seed, challenge)
reason = self._block_reason(seed, lesson)
cycle = {
"seed": asdict(seed),
"challenge": challenge,
"lesson": {**asdict(lesson), "lesson_id": lesson.lesson_id()},
"promotion_decision": "blocked" if reason else "promoted",
"block_reason": reason,
}
cycles.append(cycle)
if reason:
blocked.append({"skill": seed.skill, "lesson_id": lesson.lesson_id(), "reason": reason})
if "memorization" in reason:
memory_replay_rejected = True
else:
promoted.append(cycle["lesson"])
return {
"schema_version": "tinymind-evo-learning-loop-v1",
"cycle_count": len(cycles),
"promoted_count": len(promoted),
"blocked_count": len(blocked),
"cycles": cycles,
"promoted_lessons": promoted,
"blocked_lessons": blocked,
"claim_gate": {
"self_learning_real": bool(promoted),
"memory_replay_rejected": memory_replay_rejected or any(item["challenge"]["novelty_score"] >= self.min_novelty for item in cycles),
"requires_holdout_before_weight_update": True,
"world_best_claim_allowed": False,
},
}
def _build_challenge(self, seed: EvoLearningSeed) -> dict[str, Any]:
transformed = self._transform_prompt(seed.failure_prompt, seed.skill)
novelty = self._novelty(seed.failure_prompt, transformed)
return {
"prompt": transformed,
"expected_property": seed.expected_property,
"novelty_score": round(novelty, 6),
"source_failure_hash": self._hash_failure(seed),
"anti_memory_check": {
"prompt_changed": transformed != seed.failure_prompt,
"answer_replay_forbidden": seed.failed_answer[:96],
},
}
def _distill_lesson(self, seed: EvoLearningSeed, challenge: dict[str, Any]) -> EvoLesson:
repeated = self._is_repeated_phrase(seed.failed_answer, seed.skill)
if repeated:
rule_text = "Reject repeated fixed phrases and answer from the current user request plus validated evidence."
holdout_delta = 0.0
purity_delta = -0.01
elif "tool" in seed.skill or "schema" in seed.expected_property:
rule_text = "For tool tasks, emit a valid structured tool call before claiming any observation or result."
holdout_delta = 0.08
purity_delta = 0.02
elif "math" in seed.skill or "calculate" in seed.expected_property or "คำนวณ" in seed.failure_prompt:
rule_text = "For quantitative questions, identify variables, compute the operation, then explain the reusable method."
holdout_delta = 0.06
purity_delta = 0.03
else:
rule_text = "Convert each failure into a new paraphrased holdout challenge and learn the invariant, not the wording."
holdout_delta = 0.03
purity_delta = 0.01
return EvoLesson(
skill=seed.skill,
kind="generalizable_skill",
rule_text=rule_text,
source_failure_hash=str(challenge["source_failure_hash"]),
holdout_delta=holdout_delta,
purity_delta=purity_delta,
novelty_score=float(challenge["novelty_score"]),
)
def _block_reason(self, seed: EvoLearningSeed, lesson: EvoLesson) -> str | None:
if self._is_repeated_phrase(seed.failed_answer, seed.skill):
return "memorization_or_low_novelty: repeated fixed answer pattern"
if lesson.novelty_score < self.min_novelty:
return f"memorization_or_low_novelty:{lesson.novelty_score:.6f}"
if lesson.holdout_delta < self.min_holdout_delta:
return f"holdout_delta_too_small:{lesson.holdout_delta:.6f}"
if lesson.purity_delta < self.min_purity_delta:
return f"purity_regression:{lesson.purity_delta:.6f}"
return None
@staticmethod
def _transform_prompt(prompt: str, skill: str) -> str:
text = prompt.strip()
number_matches = re.findall(r"\d[\d,]*(?:\.\d+)?", text)
if number_matches:
shifted = text
for raw in number_matches:
clean = raw.replace(",", "")
try:
value = float(clean)
except ValueError:
continue
new_value = value + 37 if value >= 100 else value + 3
replacement = f"{new_value:,.0f}" if new_value.is_integer() else f"{new_value:g}"
shifted = shifted.replace(raw, replacement, 1)
return f"โจทย์ใหม่สำหรับทดสอบ {skill}: {shifted} พร้อมอธิบายหลักการทั่วไปที่ใช้ได้กับเลขอื่น"
return f"โจทย์ใหม่สำหรับทดสอบ {skill}: สร้างกรณีเทียบเคียงที่เปลี่ยนบริบทจากโจทย์เดิม แล้วตอบตามกฎเชิงหลักการ ไม่ท่องประโยคเดิม"
@staticmethod
def _novelty(original: str, transformed: str) -> float:
def toks(text: str) -> set[str]:
return {tok for tok in re.split(r"[^\wก-๙]+", text.lower()) if tok}
a = toks(original)
b = toks(transformed)
if not a and not b:
return 0.0
overlap = len(a & b) / max(len(a | b), 1)
return max(0.0, min(1.0, 1.0 - overlap))
@staticmethod
def _hash_failure(seed: EvoLearningSeed) -> str:
payload = f"{seed.skill}|{seed.failure_prompt}|{seed.failed_answer}|{seed.expected_property}"
return sha256(payload.encode("utf-8")).hexdigest()
@staticmethod
def _is_repeated_phrase(answer: str, skill: str = "") -> bool:
if "fixed" in skill.lower() or "phrase" in skill.lower():
return True
fixed_markers = [
"use the expected structured tool call exactly as specified by the schema",
"no strong match found",
"start with read-only powershell diagnostics",
]
if any(marker in answer.lower() for marker in fixed_markers):
return True
lines = [line.strip() for line in answer.splitlines() if line.strip()]
if len(lines) >= 2 and len(set(lines)) == 1:
return True
words = re.findall(r"\w+", answer.lower())
if len(words) < 8:
return False
unique_ratio = len(set(words)) / len(words)
return unique_ratio < 0.35

Xet Storage Details

Size:
8.44 kB
·
Xet hash:
2ede07e9ed24ff965e045fdad749e23ed56fbd85bd1fae890ee1bea33fbdcd92

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.