Spaces:
Running
Running
| """MemoryAgent — extracts a lesson from a finished episode and edits lessons.md. | |
| Implementation is deterministic and dependency-free for the hackathon | |
| submission. It mines: | |
| - the action history (what dispatches were issued, in what order) | |
| - the per-step rewards (which steps moved the needle) | |
| - the final score and component breakdown | |
| into a structured lesson keyed by the dominant change_type encountered | |
| during the episode. | |
| If a lesson with the same pattern already exists in lessons.md, the | |
| MemoryAgent updates its confidence using a moving average and appends | |
| the new episode_id; otherwise it creates a fresh entry. | |
| """ | |
| import os | |
| from typing import Dict, List, Optional | |
| from server.memory.lesson_schema import ( | |
| Lesson, | |
| parse_lessons_md, | |
| render_lessons_md, | |
| ) | |
| class MemoryAgent: | |
| def __init__(self, lessons_path: str = "lessons.md", min_score_to_remember: float = 0.55): | |
| self.lessons_path = lessons_path | |
| self.min_score = min_score_to_remember | |
| def update( | |
| self, | |
| episode_id: str, | |
| score: float, | |
| n_steps: int, | |
| action_log: List[Dict], | |
| dominant_change_type: Optional[str], | |
| component_rewards: Dict[str, float], | |
| ) -> Optional[Lesson]: | |
| """Read lessons.md, extract a new lesson if score warrants it, | |
| write the updated file back. Returns the Lesson written (or None).""" | |
| if score < self.min_score: | |
| return None | |
| if not dominant_change_type: | |
| return None | |
| existing = self._load() | |
| pattern_key = self._pattern_label(dominant_change_type) | |
| match: Optional[Lesson] = None | |
| for L in existing: | |
| if L.pattern == pattern_key: | |
| match = L | |
| break | |
| what_works = self._summarize_what_works(action_log, n_steps) | |
| what_fails = self._summarize_what_fails(component_rewards) | |
| if match is None: | |
| new_lesson = Lesson( | |
| lesson_id=f"L{len(existing)+1:03d}", | |
| pattern=pattern_key, | |
| first_seen=episode_id, | |
| what_works=what_works, | |
| what_fails=what_fails, | |
| confidence=min(0.99, max(0.05, score)), | |
| episodes=[episode_id], | |
| ) | |
| existing.append(new_lesson) | |
| written = new_lesson | |
| else: | |
| # moving average over confidence; cap episode list at 20 most recent | |
| n = len(match.episodes) + 1 | |
| match.confidence = min(0.99, ((match.confidence * (n - 1)) + score) / n) | |
| if episode_id not in match.episodes: | |
| match.episodes.append(episode_id) | |
| match.episodes = match.episodes[-20:] | |
| if not match.what_works: | |
| match.what_works = what_works | |
| if not match.what_fails: | |
| match.what_fails = what_fails | |
| written = match | |
| self._save(existing) | |
| return written | |
| def _load(self) -> List[Lesson]: | |
| if not os.path.exists(self.lessons_path): | |
| return [] | |
| with open(self.lessons_path, "r", encoding="utf-8") as f: | |
| text = f.read() | |
| return parse_lessons_md(text) | |
| def _save(self, lessons: List[Lesson]): | |
| text = render_lessons_md(lessons) | |
| with open(self.lessons_path, "w", encoding="utf-8") as f: | |
| f.write(text) | |
| def _pattern_label(change_type: str) -> str: | |
| return { | |
| "field_renamed": "Field renames in response shape", | |
| "type_narrowed": "Type narrowing in response fields", | |
| "required_field_added": "Required field additions", | |
| "endpoint_removed": "Endpoint removals or moves", | |
| "endpoint_renamed": "Endpoint renames or moves", | |
| "enum_narrowed": "Enum value narrowing", | |
| "response_shape_changed": "Response shape nesting changes", | |
| "auth_scheme_changed": "Authentication scheme changes", | |
| "field_removed": "Field removals in response shape", | |
| "param_required_added": "Required query parameter additions", | |
| "default_changed": "Default value changes", | |
| "method_changed": "HTTP method changes on endpoints", | |
| "status_code_removed": "Status code removals", | |
| }.get(change_type, f"Other: {change_type}") | |
| def _summarize_what_works(action_log: List[Dict], n_steps: int) -> str: | |
| commands = [a.get("command") for a in action_log if a.get("command")] | |
| if not commands: | |
| return "" | |
| summary = " -> ".join(commands) | |
| return f"In {n_steps} steps: {summary}" | |
| def _summarize_what_fails(component_rewards: Dict[str, float]) -> str: | |
| weak = [name for name, v in component_rewards.items() if v < 0.4] | |
| if not weak: | |
| return "No weak components observed." | |
| return "Weak components last episode: " + ", ".join(weak) | |