File size: 5,040 Bytes
3040bf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""MemoryAgent — extracts a lesson from a finished episode and edits lessons.md.

Implementation is deterministic and dependency-free for the hackathon
submission. It mines:
  - the action history (what dispatches were issued, in what order)
  - the per-step rewards (which steps moved the needle)
  - the final score and component breakdown
into a structured lesson keyed by the dominant change_type encountered
during the episode.

If a lesson with the same pattern already exists in lessons.md, the
MemoryAgent updates its confidence using a moving average and appends
the new episode_id; otherwise it creates a fresh entry.
"""

import os
from typing import Dict, List, Optional

from server.memory.lesson_schema import (
    Lesson,
    parse_lessons_md,
    render_lessons_md,
)


class MemoryAgent:

    def __init__(self, lessons_path: str = "lessons.md", min_score_to_remember: float = 0.55):
        self.lessons_path = lessons_path
        self.min_score = min_score_to_remember

    def update(
        self,
        episode_id: str,
        score: float,
        n_steps: int,
        action_log: List[Dict],
        dominant_change_type: Optional[str],
        component_rewards: Dict[str, float],
    ) -> Optional[Lesson]:
        """Read lessons.md, extract a new lesson if score warrants it,
        write the updated file back. Returns the Lesson written (or None)."""

        if score < self.min_score:
            return None
        if not dominant_change_type:
            return None

        existing = self._load()
        pattern_key = self._pattern_label(dominant_change_type)

        match: Optional[Lesson] = None
        for L in existing:
            if L.pattern == pattern_key:
                match = L
                break

        what_works = self._summarize_what_works(action_log, n_steps)
        what_fails = self._summarize_what_fails(component_rewards)

        if match is None:
            new_lesson = Lesson(
                lesson_id=f"L{len(existing)+1:03d}",
                pattern=pattern_key,
                first_seen=episode_id,
                what_works=what_works,
                what_fails=what_fails,
                confidence=min(0.99, max(0.05, score)),
                episodes=[episode_id],
            )
            existing.append(new_lesson)
            written = new_lesson
        else:
            # moving average over confidence; cap episode list at 20 most recent
            n = len(match.episodes) + 1
            match.confidence = min(0.99, ((match.confidence * (n - 1)) + score) / n)
            if episode_id not in match.episodes:
                match.episodes.append(episode_id)
            match.episodes = match.episodes[-20:]
            if not match.what_works:
                match.what_works = what_works
            if not match.what_fails:
                match.what_fails = what_fails
            written = match

        self._save(existing)
        return written

    def _load(self) -> List[Lesson]:
        if not os.path.exists(self.lessons_path):
            return []
        with open(self.lessons_path, "r", encoding="utf-8") as f:
            text = f.read()
        return parse_lessons_md(text)

    def _save(self, lessons: List[Lesson]):
        text = render_lessons_md(lessons)
        with open(self.lessons_path, "w", encoding="utf-8") as f:
            f.write(text)

    @staticmethod
    def _pattern_label(change_type: str) -> str:
        return {
            "field_renamed":          "Field renames in response shape",
            "type_narrowed":          "Type narrowing in response fields",
            "required_field_added":   "Required field additions",
            "endpoint_removed":       "Endpoint removals or moves",
            "endpoint_renamed":       "Endpoint renames or moves",
            "enum_narrowed":          "Enum value narrowing",
            "response_shape_changed": "Response shape nesting changes",
            "auth_scheme_changed":    "Authentication scheme changes",
            "field_removed":          "Field removals in response shape",
            "param_required_added":   "Required query parameter additions",
            "default_changed":        "Default value changes",
            "method_changed":         "HTTP method changes on endpoints",
            "status_code_removed":    "Status code removals",
        }.get(change_type, f"Other: {change_type}")

    @staticmethod
    def _summarize_what_works(action_log: List[Dict], n_steps: int) -> str:
        commands = [a.get("command") for a in action_log if a.get("command")]
        if not commands:
            return ""
        summary = " -> ".join(commands)
        return f"In {n_steps} steps: {summary}"

    @staticmethod
    def _summarize_what_fails(component_rewards: Dict[str, float]) -> str:
        weak = [name for name, v in component_rewards.items() if v < 0.4]
        if not weak:
            return "No weak components observed."
        return "Weak components last episode: " + ", ".join(weak)