File size: 22,149 Bytes
9a3b69b
43f41de
9a3b69b
eb1ebe6
 
43f41de
 
 
eb1ebe6
 
 
 
 
 
9a3b69b
 
 
 
 
 
 
 
 
5869d56
9a3b69b
43f41de
eb1ebe6
43f41de
 
9a3b69b
 
5869d56
9a3b69b
43f41de
eb1ebe6
43f41de
 
9a3b69b
 
 
8fa7af1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a3b69b
 
eb1ebe6
 
 
 
9a3b69b
eb1ebe6
 
9a3b69b
 
 
 
 
 
 
 
eb1ebe6
 
8fa7af1
43f41de
eb1ebe6
43f41de
 
 
 
 
 
 
 
eb1ebe6
 
 
 
9a3b69b
 
eb1ebe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43f41de
 
 
 
 
 
 
 
eb1ebe6
 
 
 
 
 
 
43f41de
 
eb1ebe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43f41de
 
 
 
 
 
 
 
eb1ebe6
 
 
 
 
 
43f41de
 
eb1ebe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a3b69b
 
 
eb1ebe6
8fa7af1
43f41de
eb1ebe6
43f41de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a3b69b
43f41de
 
 
 
 
 
 
 
 
9a3b69b
43f41de
 
9a3b69b
 
 
 
 
 
 
 
b12f1bd
eb1ebe6
43f41de
 
 
 
eb1ebe6
 
 
43f41de
 
9a3b69b
 
 
 
eb1ebe6
43f41de
 
 
 
 
 
 
 
 
eb1ebe6
43f41de
eb1ebe6
 
 
 
 
 
9a3b69b
eb1ebe6
 
43f41de
 
eb1ebe6
 
 
 
 
 
 
9a3b69b
 
43f41de
8fa7af1
43f41de
 
 
 
8fa7af1
43f41de
 
 
eb1ebe6
 
 
 
43f41de
 
 
eb1ebe6
 
 
43f41de
 
eb1ebe6
43f41de
8fa7af1
eb1ebe6
9a3b69b
eb1ebe6
43f41de
eb1ebe6
43f41de
 
 
 
eb1ebe6
 
43f41de
 
b12f1bd
eb1ebe6
 
 
 
43f41de
eb1ebe6
b12f1bd
eb1ebe6
43f41de
 
 
 
 
b12f1bd
43f41de
 
 
eb1ebe6
9a3b69b
eb1ebe6
43f41de
 
 
 
 
 
 
 
 
eb1ebe6
 
 
9a3b69b
eb1ebe6
 
 
 
 
 
 
 
43f41de
eb1ebe6
 
 
 
 
 
 
43f41de
eb1ebe6
8fa7af1
 
eb1ebe6
5869d56
 
eb1ebe6
43f41de
 
 
8fa7af1
 
43f41de
 
eb1ebe6
 
 
 
43f41de
eb1ebe6
43f41de
8fa7af1
eb1ebe6
43f41de
eb1ebe6
 
 
9a3b69b
43f41de
 
 
 
 
8fa7af1
 
 
 
43f41de
eb1ebe6
 
43f41de
eb1ebe6
 
43f41de
8fa7af1
eb1ebe6
 
 
 
43f41de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fa7af1
 
43f41de
 
 
 
 
 
 
 
 
 
 
 
 
 
8fa7af1
 
43f41de
8fa7af1
 
 
 
 
 
43f41de
 
8fa7af1
 
 
 
 
 
 
 
 
 
43f41de
 
8fa7af1
43f41de
 
8fa7af1
 
43f41de
 
 
 
 
 
 
eb1ebe6
 
 
9a3b69b
eb1ebe6
 
 
 
 
 
 
 
 
8fa7af1
43f41de
eb1ebe6
 
 
 
 
 
 
 
 
b12f1bd
eb1ebe6
 
 
8fa7af1
eb1ebe6
 
43f41de
 
 
eb1ebe6
 
 
 
9a3b69b
 
 
 
8fa7af1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
"""
Research -> Interactive Explainer Environment (multi-step, async).

Episode flow:
  1. reset() β†’ agent gets a topic + tier
  2. step(explore) Γ— 0..MAX_EXPLORE β†’ agent calls research tools
  3. step(generate) Γ— 1 β†’ agent produces marimo/manim code
  4. step(repair) Γ— 0..MAX_REPAIR β†’ agent fixes lint/build errors if needed

Each step returns a per-step reward. The final generate step also includes
a generation reward that accounts for how well the code uses the research.

The environment supports async via reset_async() / step_async() overrides.
OpenEnv's HTTP server detects these and calls them directly (no thread pool).
"""

import random
from uuid import uuid4

from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State

try:
    from ..constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS, clamp_action_reward
    from ..models import ExplainerAction, ExplainerObservation
    from ..research import AVAILABLE_TOOLS, run_research_tool
    from ..rewards.exploration import compute_explore_reward
    from ..rewards.generation import adjust_repair_reward, compute_generate_reward
    from ..rewards.sandbox import validate_code
    from ..task_bank import ALL_TASKS, EASY_TASKS, HARD_TASKS, MEDIUM_TASKS, Task
except ImportError:
    from constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS, clamp_action_reward
    from models import ExplainerAction, ExplainerObservation
    from research import AVAILABLE_TOOLS, run_research_tool
    from rewards.exploration import compute_explore_reward
    from rewards.generation import adjust_repair_reward, compute_generate_reward
    from rewards.sandbox import validate_code
    from task_bank import ALL_TASKS, EASY_TASKS, HARD_TASKS, MEDIUM_TASKS, Task


MB002_REPAIR_HINT = (
    "MB002 repair checklist: Marimo treats every non-underscore assignment as a "
    "global notebook variable, including `for` loop variables. Audit the whole "
    "file and rename cell-local names to private names everywhere: `arr` -> "
    "`_arr`, `target` -> `_target`, `i` -> `_i`, `t` -> `_t`, `freqs` -> "
    "`_freqs`, `fig` -> `_fig`, `ax` -> `_ax`. Public names should only be used "
    "for values intentionally passed to later cells, and each public name may be "
    "defined once globally."
)


def _render_errors_with_hints(errors: str, error_codes: list[str]) -> str:
    if "MB002" not in error_codes:
        return errors
    return f"{errors}\n\n{MB002_REPAIR_HINT}"


class ExplainerEnvironment(Environment):
    """
    Multi-step Research β†’ Interactive Explainer environment.

    Phase 1 (explore): agent issues search queries, receives papers/wiki sections.
    Phase 2 (generate): agent produces marimo/manim code using the research.

    Supports async via reset_async() / step_async() β€” OpenEnv's server detects
    the overrides and awaits them directly instead of using a thread pool.
    """

    SUPPORTS_CONCURRENT_SESSIONS: bool = True

    def __init__(self):
        super().__init__()
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self._current_task: Task | None = None
        self._difficulty_pool: list[Task] = EASY_TASKS
        self._accumulated_context: list[str] = []
        self._explore_actions: list[str] = []
        self._used_tools: set[str] = set()
        self._explore_steps: int = 0
        self._repair_steps: int = 0
        self._phase: str = "explore"
        self._done: bool = False
        self._last_code: str = ""
        self._last_format: str = "marimo"
        self._last_narration: str = ""
        self._last_errors: str = ""
        self._last_error_codes: list[str] = []

    # ------------------------------------------------------------------
    # Sync interface (fallback β€” OpenEnv prefers async when overridden)
    # ------------------------------------------------------------------

    def reset(self, seed=None, episode_id=None, **kwargs) -> ExplainerObservation:
        """Sample a task and return the initial observation (sync)."""
        return self._do_reset(seed=seed, episode_id=episode_id, **kwargs)

    def step(self, action: ExplainerAction, timeout_s=None, **kwargs) -> ExplainerObservation:
        """Route to explore or generate handler (sync β€” explore uses blocking fallback)."""
        import asyncio
        self._state.step_count += 1
        task = self._current_task

        if task is None:
            return ExplainerObservation(
                feedback="Error: no task set. Call reset() first.",
                done=True,
                reward=-1.0,
            )
        if self._done:
            return self._make_obs(
                task,
                phase="done",
                feedback="Episode is already done. Call reset() to start a new one.",
                reward=0.0,
                done=True,
            )

        try:
            if action.action_type == "explore":
                # Run async explore in a new event loop for sync callers
                return asyncio.run(self._handle_explore(action, task))
            elif action.action_type == "generate":
                return self._handle_generate(action, task)
            elif action.action_type == "repair":
                return self._handle_repair(action, task)
            else:
                return self._make_obs(
                    task,
                    phase="explore",
                    feedback=f"Unknown action_type: {action.action_type}",
                    reward=0.0,
                    done=True,
                )
        except Exception as e:
            return self._make_obs(
                task,
                phase="done",
                feedback=f"Environment error: {e}",
                reward=0.0,
                done=True,
            )

    # ------------------------------------------------------------------
    # Async interface (preferred β€” OpenEnv detects these overrides)
    # ------------------------------------------------------------------

    async def reset_async(self, seed=None, episode_id=None, **kwargs) -> ExplainerObservation:
        """Sample a task and return the initial observation (async)."""
        return self._do_reset(seed=seed, episode_id=episode_id, **kwargs)

    async def step_async(self, action: ExplainerAction, timeout_s=None, **kwargs) -> ExplainerObservation:
        """Route to explore or generate handler (async)."""
        self._state.step_count += 1
        task = self._current_task

        if task is None:
            return ExplainerObservation(
                feedback="Error: no task set. Call reset() first.",
                done=True,
                reward=-1.0,
            )
        if self._done:
            return self._make_obs(
                task,
                phase="done",
                feedback="Episode is already done. Call reset() to start a new one.",
                reward=0.0,
                done=True,
            )

        try:
            if action.action_type == "explore":
                return await self._handle_explore(action, task)
            elif action.action_type == "generate":
                return self._handle_generate(action, task)
            elif action.action_type == "repair":
                return self._handle_repair(action, task)
            else:
                return self._make_obs(
                    task,
                    phase="explore",
                    feedback=f"Unknown action_type: {action.action_type}",
                    reward=0.0,
                    done=True,
                )
        except Exception as e:
            return self._make_obs(
                task,
                phase="done",
                feedback=f"Environment error: {e}",
                reward=0.0,
                done=True,
            )

    # ------------------------------------------------------------------
    # Internal
    # ------------------------------------------------------------------

    def _do_reset(self, seed=None, episode_id=None, **kwargs) -> ExplainerObservation:
        """Shared reset logic (no I/O, so sync is fine)."""
        self._state = State(
            episode_id=episode_id or str(uuid4()), step_count=0
        )
        self._accumulated_context = []
        self._explore_actions = []
        self._used_tools = set()
        self._explore_steps = 0
        self._repair_steps = 0
        self._phase = "explore"
        self._done = False
        self._last_code = ""
        self._last_format = "marimo"
        self._last_narration = ""
        self._last_errors = ""
        self._last_error_codes = []

        # Allow selecting a specific task by topic name
        topic = kwargs.get("topic", None)
        if topic:
            match = next((t for t in ALL_TASKS if t.topic == topic), None)
            if match:
                self._current_task = match
            else:
                # Fallback to random if topic not found
                rng = random.Random(seed) if seed is not None else random.Random()
                self._current_task = rng.choice(ALL_TASKS)
        else:
            difficulty = kwargs.get("difficulty", None)
            if difficulty == "medium":
                pool = MEDIUM_TASKS
            elif difficulty == "hard":
                pool = HARD_TASKS
            elif difficulty == "easy":
                pool = EASY_TASKS
            else:
                pool = self._difficulty_pool

            rng = random.Random(seed) if seed is not None else random.Random()
            self._current_task = rng.choice(pool) if pool else rng.choice(ALL_TASKS)

        t = self._current_task
        return ExplainerObservation(
            topic=t.topic,
            content=t.content,
            tier=t.tier,
            keywords=t.keywords,
            data_available=t.data_available,
            difficulty=t.difficulty,
            phase="explore",
            feedback=(
                "Research phase: choose a tool and query relevant to the topic. "
                f"Available tools: {', '.join(AVAILABLE_TOOLS)}."
            ),
            search_results="",
            explored_context="",
            explore_steps_left=MAX_EXPLORE_STEPS,
            repair_attempts_left=MAX_REPAIR_STEPS,
            available_tools=list(AVAILABLE_TOOLS),
            done=False,
            reward=0.0,
        )

    async def _handle_explore(self, action: ExplainerAction, task: Task) -> ExplainerObservation:
        """Process an explore action: call a research tool and score the result."""
        if self._phase not in {"explore", "generate"}:
            return self._make_obs(
                task,
                phase=self._phase,
                feedback=f"Cannot explore during phase '{self._phase}'.",
                reward=0.0,
            )

        if self._explore_steps >= MAX_EXPLORE_STEPS:
            self._phase = "generate"
            return self._make_obs(
                task,
                phase="generate",
                feedback="Max explore steps reached. You must now generate.",
                reward=0.0,
            )

        self._explore_steps += 1
        query = action.query.strip()
        intent = action.intent.strip()
        tool = action.tool or "search_wikipedia"

        if not query:
            return self._make_obs(
                task,
                phase="explore",
                feedback="Empty query. Provide a search query.",
                reward=0.0,
            )

        previous_context = list(self._accumulated_context)
        previous_actions = list(self._explore_actions)
        used_tools = set(self._used_tools)

        result = await run_research_tool(tool, query, intent)
        results_text = result.render()
        self._explore_actions.append(_explore_action_text(tool, query, intent))
        if result.ok:
            self._accumulated_context.append(result.text)
            self._used_tools.add(tool)

        # Compute per-step exploration reward
        reward, components = compute_explore_reward(
            query=query,
            tool=tool,
            intent=intent,
            result=result,
            topic=task.topic,
            keywords_csv=task.keywords,
            task_content=task.content,
            difficulty=task.difficulty,
            previous_context=previous_context,
            accumulated_context=self._accumulated_context,
            used_tools=used_tools,
            previous_actions=previous_actions,
        )

        steps_left = MAX_EXPLORE_STEPS - self._explore_steps
        if steps_left > 1:
            phase = "explore"
            hint = f"Research going well β€” {steps_left} more steps available. Keep searching or move to generation."
        elif steps_left == 1:
            phase = "explore"
            hint = "Last research step available. Search for any missing context, or proceed to generate."
        else:
            phase = "generate"
            hint = "Research phase complete. Time to generate your explanation."
        self._phase = phase
        top_chunks = _top_chunks_payload(result.chunks)

        return self._make_obs(
            task,
            phase=phase,
            feedback=f"{hint}\nTool: {tool}\nReward: {components}",
            search_results=results_text,
            top_chunks=top_chunks,
            reward=reward,
            metadata={
                "step": self._state.step_count,
                "phase": "explore",
                "tool": tool,
                "source_count": len(result.chunks),
                "top_chunks": top_chunks,
                "error": result.error,
                **components,
            },
        )

    def _handle_generate(self, action: ExplainerAction, task: Task) -> ExplainerObservation:
        """Process a generate action: run sandbox, maybe open repair phase."""
        if self._phase not in {"explore", "generate"}:
            return self._make_obs(
                task,
                phase=self._phase,
                feedback=f"Cannot generate during phase '{self._phase}'.",
                reward=0.0,
            )

        fmt = action.format or "marimo"
        code = action.code
        narration = action.narration

        # Penalise generating without any exploration
        if self._explore_steps == 0:
            skip_penalty = -0.1
            penalty_msg = "Warning: generating without any research. -0.1 penalty."
        else:
            skip_penalty = 0.0
            penalty_msg = ""

        sandbox = validate_code(fmt, code)

        # Generation reward
        reward, components = compute_generate_reward(
            code=code,
            fmt=fmt,
            narration=narration,
            task=task,
            exec_success=sandbox.exec_success,
            accumulated_context=self._accumulated_context,
            static_check_passed=sandbox.check_passed,
            error_codes=sandbox.error_codes,
        )
        reward = clamp_action_reward(reward + skip_penalty)
        components["generate_total"] = round(reward, 4)

        self._last_code = code
        self._last_format = fmt
        self._last_narration = narration
        rendered_errors = _render_errors_with_hints(sandbox.render_errors(), sandbox.error_codes)
        self._last_errors = rendered_errors
        self._last_error_codes = sandbox.error_codes

        # Feedback
        parts = []
        if penalty_msg:
            parts.append(penalty_msg)
        if not sandbox.parses:
            parts.append("SYNTAX ERROR: code does not parse.")
        elif not sandbox.exec_success:
            parts.append(f"EXECUTION FAILED: {rendered_errors}")
        else:
            parts.append(f"EXECUTION OK: {sandbox.message}")
        parts.append(
            f"Reward: {', '.join(f'{k}={v}' for k, v in components.items())}"
        )

        done = sandbox.exec_success or self._repair_steps >= MAX_REPAIR_STEPS
        phase = "done" if done else "repair"
        self._phase = phase
        self._done = done
        if not done:
            parts.append(
                f"Repair phase: {MAX_REPAIR_STEPS} attempts available. "
                "Submit a revised artifact using the error feedback."
            )

        return self._make_obs(
            task,
            phase=phase,
            feedback="\n".join(parts),
            reward=reward,
            done=done,
            last_errors="" if sandbox.exec_success else rendered_errors,
            metadata={
                "step": self._state.step_count,
                "phase": "generate",
                "explore_steps_used": self._explore_steps,
                "sandbox_message": sandbox.message,
                "error_codes": sandbox.error_codes,
                **components,
            },
        )

    def _handle_repair(self, action: ExplainerAction, task: Task) -> ExplainerObservation:
        """Process one repair attempt after a failed generate action."""
        if self._phase != "repair":
            return self._make_obs(
                task,
                phase=self._phase,
                feedback="Repair is only available after a failed generate step.",
                reward=0.0,
                done=self._done,
            )
        if self._repair_steps >= MAX_REPAIR_STEPS:
            self._phase = "done"
            self._done = True
            return self._make_obs(
                task,
                phase="done",
                feedback="No repair attempts left.",
                reward=0.0,
                done=True,
            )

        self._repair_steps += 1
        fmt = action.format or self._last_format or "marimo"
        code = action.code
        narration = action.narration or self._last_narration
        previous_code = self._last_code
        previous_errors = list(self._last_error_codes)

        sandbox = validate_code(fmt, code)
        base_reward, components = compute_generate_reward(
            code=code,
            fmt=fmt,
            narration=narration,
            task=task,
            exec_success=sandbox.exec_success,
            accumulated_context=self._accumulated_context,
            static_check_passed=sandbox.check_passed,
            error_codes=sandbox.error_codes,
        )
        repair_reward, repair_components = adjust_repair_reward(
            base_reward,
            repair_success=sandbox.exec_success,
            previous_error_codes=previous_errors,
            new_error_codes=sandbox.error_codes,
            previous_code=previous_code,
            repaired_code=code,
        )
        components.update(repair_components)

        self._last_code = code
        self._last_format = fmt
        self._last_narration = narration
        rendered_errors = _render_errors_with_hints(sandbox.render_errors(), sandbox.error_codes)
        self._last_errors = rendered_errors
        self._last_error_codes = sandbox.error_codes

        attempts_left = MAX_REPAIR_STEPS - self._repair_steps
        done = sandbox.exec_success or attempts_left <= 0
        phase = "done" if done else "repair"
        self._phase = phase
        self._done = done

        status = "REPAIR OK" if sandbox.exec_success else "REPAIR FAILED"
        feedback_parts = [
            f"{status}: {sandbox.message if sandbox.exec_success else rendered_errors}",
            f"Reward: {', '.join(f'{k}={v}' for k, v in components.items())}",
        ]
        if not done:
            feedback_parts.append(
                f"Repair phase continues: {attempts_left} repair attempts left. "
                "Submit another corrected artifact using the latest error feedback."
            )
        feedback = "\n".join(feedback_parts)
        return self._make_obs(
            task,
            phase=phase,
            feedback=feedback,
            reward=repair_reward,
            done=done,
            last_errors="" if sandbox.exec_success else rendered_errors,
            metadata={
                "step": self._state.step_count,
                "phase": "repair",
                "explore_steps_used": self._explore_steps,
                "repair_steps_used": self._repair_steps,
                "sandbox_message": sandbox.message,
                "error_codes": sandbox.error_codes,
                **components,
            },
        )

    def _make_obs(
        self,
        task: Task,
        *,
        phase: str,
        feedback: str,
        reward: float = 0.0,
        done: bool = False,
        search_results: str = "",
        top_chunks: list[dict] | None = None,
        last_errors: str | None = None,
        metadata: dict | None = None,
    ) -> ExplainerObservation:
        """Helper to build a consistent observation."""
        return ExplainerObservation(
            topic=task.topic,
            content=task.content,
            tier=task.tier,
            keywords=task.keywords,
            data_available=task.data_available,
            difficulty=task.difficulty,
            phase=phase,
            feedback=feedback,
            search_results=search_results,
            top_chunks=top_chunks or [],
            explored_context="\n---\n".join(self._accumulated_context),
            explore_steps_left=MAX_EXPLORE_STEPS - self._explore_steps,
            repair_attempts_left=MAX_REPAIR_STEPS - self._repair_steps,
            last_errors=self._last_errors if last_errors is None else last_errors,
            available_tools=list(AVAILABLE_TOOLS),
            done=done,
            reward=reward,
            metadata=metadata or {},
        )

    @property
    def state(self) -> State:
        return self._state


def _explore_action_text(tool: str, query: str, intent: str) -> str:
    return f"{tool} {query.strip()} {intent.strip()}".strip()


def _top_chunks_payload(chunks) -> list[dict]:
    return [
        {
            "rank": chunk.rank,
            "source": chunk.source,
            "title": chunk.title,
            "url": chunk.url,
            "score": round(chunk.score, 4),
            "snippet": chunk.text,
        }
        for chunk in chunks[:5]
    ]