Spaces:

kgdrathan
/

explainer-env

Sleeping

App Files Files Community

kgdrathan commited on Apr 30

Commit

ac7572a

verified ·

1 Parent(s): 60b22ff

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

README.md +1 -3
__marimo__/session/a.py.json +0 -0
a.py +141 -112
dashboard.py +900 -0
dashboard_prompts.py +215 -0
server/app.py +4 -7

README.md CHANGED Viewed

@@ -14,9 +14,7 @@ tags:
 ---
 <p align="center">
-  <a href="https://kgdrathan-explainer-env-dashboard.hf.space/">
-    https://kgdrathan-explainer-env-dashboard.hf.space/
-  </a>
 </p>
 <p align="center">

 ---
 <p align="center">
+  The dashboard is served by this Space at <code>/web/</code> in the custom tab.
 </p>
 <p align="center">

__marimo__/session/a.py.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

a.py CHANGED Viewed

@@ -1,164 +1,193 @@
-import marimo as mo
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-from matplotlib.patches import Rectangle
-# Shared variables
-app = mo.App()
 @app.cell
 def _(mo):
     mo.md("""
-    # Reinforcement Learning Basics
-    Reinforcement Learning (RL) is a type of machine learning where an **agent** learns to make decisions by interacting with an **environment**. The goal is to learn a **policy** that maximizes the cumulative reward over time.
-    ## Core Concepts
-    1. **Agent**: The learner/decision-maker
-    2. **Environment**: Everything outside the agent
-    3. **State (s)**: The current situation
-    4. **Action (a)**: What the agent can do
-    5. **Reward (r)**: Feedback from environment
-    6. **Policy (π)**: Strategy that agents use to decide actions
-    7. **Value Function (V)**: How good it is to be in a state
-    8. **Q-Function (Q)**: How good it is to take an action in a state
-    9. **Bellman Equation**: Relationship between value functions at different time steps
     """)
     return
-@app.cell
-def _(mo):
-    # Simple grid world example
-    grid_size = 4
-    start = (0, 0)
-    goal = (3, 3)
-    obstacles = [(1, 1), (1, 2)]
-    # Create a simple visualization
-    _fig, _ax = plt.subplots(figsize=(6, 6))
-    _ax.set_xlim(0, grid_size)
-    _ax.set_ylim(0, grid_size)
-    _ax.set_xticks(range(grid_size))
-    _ax.set_yticks(range(grid_size))
-    _ax.grid(True)
-    # Draw obstacles
-    for obs in obstacles:
-        rect = Rectangle((obs[0], obs[1]), 1, 1, facecolor="black", alpha=0.7)
-        _ax.add_patch(rect)
-    # Draw start and goal
-    start_rect = Rectangle(start, 1, 1, facecolor="green", alpha=0.7)
-    goal_rect = Rectangle(goal, 1, 1, facecolor="red", alpha=0.7)
-    _ax.add_patch(start_rect)
-    _ax.add_patch(goal_rect)
-    _ax.text(start[0] + 0.5, start[1] + 0.5, "Start", ha="center", va="center")
-    _ax.text(goal[0] + 0.5, goal[1] + 0.5, "Goal", ha="center", va="center")
-    _ax.set_title("Simple Grid World Example")
-    _ax.invert_yaxis()  # To match standard grid coordinates
-    mo.ui.matplotlib(_fig)
-    plt.close(_fig)
-    return
 @app.cell
 def _(mo):
     mo.md("""
-    ## How It Works
-    The agent interacts with the environment in episodes:
-    1. **Observe State (s)**: Agent senses its current situation
-    2. **Choose Action (a)**: Based on policy π(a|s)
-    3. **Environment Transitions**: Move to new state s'
-    4. **Receive Reward (r)**: Immediate feedback
-    5. **Update Knowledge**: Learn from experience
-    The goal is to maximize expected cumulative discounted reward:
-    $G_t = \sum_{k=0}^{\infty} \gamma^k r_{t+k+1}$
-    Where γ ∈ [0,1] is the discount factor.
     """)
     return
 @app.cell
 def _(mo):
-    # Value function explanation
-    mo.md("""
-    ### Value Functions
-    The **Value Function V(s)** represents how good it is to be in a state:
-    $V(s) = \mathbb{E}[G_t | S_t = s]$
-    The **Q-Function Q(s,a)** represents how good it is to take an action in a state:
-    $Q(s,a) = \mathbb{E}[G_t | S_t = s, A_t = a]$
-    These functions help the agent evaluate the long-term reward of states and actions.
-    """)
-    return
-@app.cell
-def _(mo):
-    # Bellman Equation explanation
-    mo.md("""
-    ### Bellman Equation
-    The Bellman equation expresses the relationship between the value of a state and the values of subsequent states:
-    $V(s) = \max_a \sum_{s'} P(s'|s,a)[r(s,a,s') + \gamma V(s')]$
-    And for Q-values:
-    $Q(s,a) = \sum_{s'} P(s'|s,a)[r(s,a,s') + \gamma \max_{a'} Q(s',a')]$
-    These equations are fundamental to solving RL problems iteratively.
-    """)
     return
 @app.cell
 def _(mo):
-    # Policy definition
     mo.md("""
-    ### Policy π(a|s)
-    A policy defines the behavior of an agent. It's a mapping from states to probabilities of selecting each possible action.
-    For example, a stochastic policy could be:
-    $\pi(a|s) = \text{Probability of taking action } a \text{ in state } s$
-    The goal is to find an optimal policy π* that maximizes expected cumulative reward.
     """)
     return
 @app.cell
-def _(mo):
-    # Interactive elements
-    mo.md("""
-    ## Try It Yourself!
-    Below is an interactive grid world. You can visualize how an agent might navigate from start to goal while avoiding obstacles.
-    ### Next Steps
-    - Understand how rewards influence agent behavior
-    - Explore how policies change based on learning
-    - Study how value functions converge over time
-    """)
     return

+import marimo
+__generated_with = "0.23.3"
+app = marimo.App()
+@app.cell
+def _():
+    import marimo as mo
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from matplotlib.patches import Rectangle
+    return Rectangle, mo, plt
 @app.cell
 def _(mo):
     mo.md("""
+    # Bayes' Theorem: Updating Beliefs with Evidence
+    Bayes' Theorem is a fundamental concept in probability theory that helps us update our beliefs based on new evidence.
+    The formula is:
+    $$P(A|B) =     rac{P(B|A)P(A)}{P(B)}$$
+    Where:
+    - $P(A|B)$ is the **posterior probability**: the probability of event A given that B occurred
+    - $P(B|A)$ is the **likelihood**: the probability of observing B given that A is true
+    - $P(A)$ is the **prior probability**: our initial belief about A before seeing B
+    - $P(B)$ is the **evidence**: the overall probability of observing B
     """)
     return
 @app.cell
 def _(mo):
     mo.md("""
+    ## Medical Testing Example
+    Let's say we're testing for a rare disease:
+    - Prevalence (prior probability): 0.1% of the population has the disease
+    - Test accuracy:
+      - If you have the disease, the test correctly identifies it 99% of the time (true positive rate)
+      - If you don't have the disease, the test incorrectly says you do 5% of the time (false positive rate)
+    What's the probability that someone who tests positive actually has the disease?
     """)
     return
 @app.cell
 def _(mo):
+    # Define the parameters
+    prior_disease = 0.001  # P(Disease)
+    sensitivity = 0.99  # P(Test+ | Disease)
+    false_positive_rate = 0.05  # P(Test+ | No Disease)
+    # Calculate the components
+    # P(Test+)
+    p_test_positive = sensitivity * prior_disease + false_positive_rate * (1 - prior_disease)
+    # Apply Bayes' Theorem
+    posterior_disease = (sensitivity * prior_disease) / p_test_positive
+    # Display results
+    mo.md(f"""
+    Given:
+    - Prior probability of disease: {prior_disease * 100:.1f}%
+    - Sensitivity (true positive rate): {sensitivity * 100}%
+    - False positive rate: {false_positive_rate * 100}%
+    Using Bayes' theorem:
+    P(Disease|Test+) = [P(Test+|Disease) × P(Disease)] / P(Test+)
+    P(Test+) = P(Test+|Disease) × P(Disease) + P(Test+|No Disease) × P(No Disease)
+    P(Test+) = {sensitivity:.2f} × {prior_disease:.3f} + {false_positive_rate:.2f} × {(1 - prior_disease):.3f}
+    P(Test+) = {p_test_positive:.4f}
+    P(Disease|Test+) = ({sensitivity:.2f} × {prior_disease:.3f}) / {p_test_positive:.4f} = {posterior_disease:.3f}
+    **Only {posterior_disease * 100:.1f}% of people who test positive actually have the disease!**
+    """)
+    return
+@app.cell
+def _(Rectangle, mo, plt):
+    # Create a visualization showing the four categories
+    fig, ax = plt.subplots(figsize=(10, 6))
+    # Set up the grid
+    ax.set_xlim(0, 10)
+    ax.set_ylim(0, 8)
+    # Draw rectangles for different categories
+    # Population (1000 people)
+    ax.add_patch(Rectangle((1, 1), 8, 6, fill=False, edgecolor="black", linewidth=2))
+    ax.text(5, 7.2, "Population (1000 people)", ha="center", va="bottom")
+    # Disease (0.1%)
+    disease_count = 1000 * 0.001
+    no_disease_count = 1000 - disease_count
+    # Draw disease area
+    ax.add_patch(Rectangle((1, 1), 8, disease_count / 1000 * 6, facecolor="red", alpha=0.5))
+    ax.text(
+        5,
+        1.5 + disease_count / 1000 * 3,
+        f"Disease ({disease_count:.0f} people)",
+        ha="center",
+        va="center",
+        color="white",
+        weight="bold",
+    )
+    # Draw non-disease area
+    ax.add_patch(
+        Rectangle((1, 1 + disease_count / 1000 * 6), 8, no_disease_count / 1000 * 6, facecolor="blue", alpha=0.5)
+    )
+    ax.text(
+        5,
+        1.5 + disease_count / 1000 * 6 + no_disease_count / 1000 * 3,
+        f"No Disease ({no_disease_count:.0f} people)",
+        ha="center",
+        va="center",
+        color="white",
+        weight="bold",
+    )
+    # Add labels
+    ax.text(0.5, 4, "True Positive\n(99% of Diseased)", ha="right", va="center")
+    ax.text(0.5, 2, "False Positive\n(5% of Non-Diseased)", ha="right", va="center")
+    # Add test results
+    tp = disease_count * 0.99  # True positives
+    fp = no_disease_count * 0.05  # False positives
+    ax.add_patch(Rectangle((2, 3.5), 2, 1, facecolor="green", alpha=0.7))
+    ax.text(3, 4, f"True Positive\n({tp:.0f})", ha="center", va="center")
+    ax.add_patch(Rectangle((2, 1.5), 2, 1, facecolor="orange", alpha=0.7))
+    ax.text(3, 2, f"False Positive\n({fp:.0f})", ha="center", va="center")
+    # Add total positive tests
+    total_positives = tp + fp
+    ax.add_patch(Rectangle((5, 1.5), 3, 2, facecolor="yellow", alpha=0.5))
+    ax.text(6.5, 2.5, f"Total Positive\nTests ({total_positives:.0f})", ha="center", va="center")
+    ax.set_xticks([])
+    ax.set_yticks([])
+    ax.set_title("Bayes' Theorem Visualization: Medical Testing")
+    mo.ui.matplotlib(plt.gca())
     return
 @app.cell
 def _(mo):
     mo.md("""
+    ## Why This Matters
+    This example shows why Bayes' Theorem is important:
+    1. **High false positive rate** combined with **low prevalence** leads to counterintuitive results
+    2. **95% accurate tests** can still give misleading results when the condition is rare
+    3. **Bayes' Theorem forces us to think about**:
+       - Our initial beliefs (prior probability)
+       - How likely we are to observe evidence given our beliefs
+       - How to update our beliefs in light of new evidence
+    This same logic applies to:
+    - Spam detection
+    - Financial risk assessment
+    - Scientific hypothesis testing
+    - Machine learning classification
     """)
     return
 @app.cell
+def _():
+    return
+@app.cell
+def _():
     return

dashboard.py ADDED Viewed

	@@ -0,0 +1,900 @@

+"""
+Gradio UI for the Research → Interactive Explainer Environment.
+Two modes:
+  1. LLM Mode: LLM drives exploration + generation, human watches step-by-step
+  2. Human Mode: human types queries and code, sees rewards in real-time
+Environment service is the same OpenEnv server that hosts this UI.
+LLM configuration is resolved from API_URL, HF_TOKEN/API_KEY, and MODEL_NAME.
+"""
+import ast
+import json
+import os
+import re
+import uuid
+from pathlib import Path
+from typing import Any
+import gradio as gr
+from dotenv import load_dotenv
+# Load .env from project root
+PROJECT_ROOT = Path(__file__).parent
+load_dotenv(PROJECT_ROOT / ".env")
+try:
+    from .client import ExplainerEnv
+    from .constants import SUCCESS_SCORE_THRESHOLD, normalized_episode_score
+    from .dashboard_prompts import (
+        SYSTEM_PROMPT,
+        build_explore_prompt,
+        build_generate_prompt,
+        build_repair_prompt,
+        parse_explore_response,
+        parse_generate_response,
+    )
+    from .models import ExplainerAction
+    from .task_bank import ALL_TASKS
+except ImportError:  # pragma: no cover - supports direct execution from env root
+    from client import ExplainerEnv
+    from constants import SUCCESS_SCORE_THRESHOLD, normalized_episode_score
+    from dashboard_prompts import (
+        SYSTEM_PROMPT,
+        build_explore_prompt,
+        build_generate_prompt,
+        build_repair_prompt,
+        parse_explore_response,
+        parse_generate_response,
+    )
+    from models import ExplainerAction
+    from task_bank import ALL_TASKS
+SELF_ENV_BASE_URL = f"http://127.0.0.1:{os.getenv('PORT', '8000')}"
+DEFAULT_MODEL_NAME = "bedrock-qwen3-coder-30b-a3b"
+# ---------------------------------------------------------------------------
+# Task catalog (reference only)
+# ---------------------------------------------------------------------------
+TASK_CHOICES = ["(random)"] + [f"{t.topic}  [{t.difficulty}, {t.tier}]" for t in ALL_TASKS]
+# Map dropdown label -> topic name for reset(topic=...)
+_TASK_LABEL_TO_TOPIC: dict[str, str] = {f"{t.topic}  [{t.difficulty}, {t.tier}]": t.topic for t in ALL_TASKS}
+# ---------------------------------------------------------------------------
+# Session manager
+# ---------------------------------------------------------------------------
+class SessionManager:
+    """Module-level registry mapping session_id -> connected ExplainerEnv client."""
+    def __init__(self):
+        self._clients: dict[str, ExplainerEnv] = {}
+        self._urls: dict[str, str] = {}
+    async def get_or_create(self, session_id: str, base_url: str) -> ExplainerEnv:
+        if session_id in self._clients and self._urls.get(session_id) != base_url:
+            await self.close(session_id)
+        if session_id not in self._clients:
+            client = ExplainerEnv(base_url=base_url.rstrip("/"))
+            await client.connect()
+            self._clients[session_id] = client
+            self._urls[session_id] = base_url
+        return self._clients[session_id]
+    async def close(self, session_id: str) -> None:
+        client = self._clients.pop(session_id, None)
+        self._urls.pop(session_id, None)
+        if client:
+            try:
+                await client.disconnect()
+            except Exception:
+                pass
+SESSION_MGR = SessionManager()
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _resolve_env_url() -> str:
+    return SELF_ENV_BASE_URL
+def _resolve_llm() -> tuple[str, str, str]:
+    api_url = (os.getenv("API_URL") or os.getenv("API_BASE_URL") or "").rstrip("/")
+    api_key = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
+    model = os.getenv("MODEL_NAME") or DEFAULT_MODEL_NAME
+    return api_url, api_key, model
+def call_llm_or_raise(client: Any, user_prompt: str, *, model: str, max_tokens: int) -> str:
+    """Call the LLM and preserve provider errors for the dashboard."""
+    completion = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": user_prompt},
+        ],
+        temperature=0.7,
+        max_tokens=max_tokens,
+        stream=False,
+    )
+    return (completion.choices[0].message.content or "").strip()
+def _format_llm_exception(exc: Exception, api_url: str, model: str) -> str:
+    cause = getattr(exc, "__cause__", None)
+    detail = str(cause or exc).strip() or exc.__class__.__name__
+    return f"{exc.__class__.__name__} from {api_url} using model {model}: {detail}"
+def empty_state() -> dict[str, Any]:
+    return {
+        "session_id": str(uuid.uuid4()),
+        "obs": None,
+        "step": 0,
+        "rewards": [],
+        "reward_details": [],
+        "log": [],
+        "done": False,
+        "phase": "not_started",
+        "explored_context": "",
+        "topic": "",
+        "tier": "",
+        "keywords": "",
+        "content": "",
+        "data_available": False,
+        "last_code": "",
+        "last_format": "marimo",
+        "generated_response": "",
+        "parsed_response": "",
+        "top_chunks": [],
+    }
+def build_reward_matrix(reward_details: list[dict[str, Any]]) -> gr.update:
+    """Build a reward matrix with reward names as rows and steps as columns."""
+    steps = sorted({entry["step"] for entry in reward_details})
+    reward_names: list[str] = []
+    cells: dict[tuple[str, int], Any] = {}
+    for entry in reward_details:
+        step = entry["step"]
+        components = entry.get("components", {})
+        if not components:
+            components = {"total": ""}
+        for name, value in components.items():
+            if name not in reward_names:
+                reward_names.append(name)
+            cells[(name, step)] = value
+    headers = ["Reward"] + [f"Step {step}" for step in steps]
+    rows = []
+    for name in reward_names:
+        row = [name]
+        for step in steps:
+            value = cells.get((name, step), "")
+            row.append(_fmt_component(value) if value != "" else "")
+        rows.append(row)
+    return gr.update(
+        headers=headers,
+        value=rows,
+        column_count=(len(headers), "fixed"),
+    )
+def build_reward_summary(reward_details: list[dict[str, Any]]) -> str:
+    if not reward_details:
+        return "*No rewards yet.*"
+    sections = []
+    for entry in reward_details:
+        components = entry.get("components", {})
+        total = _first_present(
+            components,
+            ("explore_total", "generate_total", "repair_total"),
+            default="n/a",
+        )
+        sections.append(f"**Step {entry['step']} · {entry['phase']} · total {_fmt_component(total)}**")
+    return "\n\n".join(sections)
+def build_top_chunks_df(chunks: list[dict[str, Any]]) -> list[list[Any]]:
+    rows = []
+    for chunk in chunks[:5]:
+        rows.append([
+            chunk.get("rank", ""),
+            chunk.get("source", ""),
+            chunk.get("title", ""),
+            chunk.get("score", ""),
+            chunk.get("url", ""),
+            _trim_display_text(str(chunk.get("snippet", "")), 700),
+        ])
+    return rows
+def extract_top_chunks(obs_dict: dict[str, Any], search_results: str) -> list[dict[str, Any]]:
+    metadata = obs_dict.get("metadata") or {}
+    chunks = obs_dict.get("top_chunks") or metadata.get("top_chunks") or []
+    return chunks or parse_rendered_chunks(search_results)
+def parse_rendered_chunks(search_results: str) -> list[dict[str, Any]]:
+    """Fallback parser for rendered research results if structured fields are absent."""
+    chunks = []
+    for part in re.split(r"\n\n---\n\n", search_results or ""):
+        lines = [line for line in part.splitlines() if line.strip()]
+        if not lines:
+            continue
+        match = re.match(r"\[(\d+)\]\s+([^:]+):\s+(.+)", lines[0])
+        if not match:
+            continue
+        url = ""
+        body_start = 1
+        if len(lines) > 1 and lines[1].startswith("URL:"):
+            url = lines[1].removeprefix("URL:").strip()
+            body_start = 2
+        chunks.append({
+            "rank": int(match.group(1)),
+            "source": match.group(2).strip(),
+            "title": match.group(3).strip(),
+            "url": url,
+            "score": "",
+            "snippet": "\n".join(lines[body_start:]).strip(),
+        })
+    return chunks[:5]
+def _trim_display_text(text: str, max_chars: int) -> str:
+    text = re.sub(r"\s+", " ", text).strip()
+    return text if len(text) <= max_chars else text[:max_chars].rstrip() + "..."
+def _first_present(mapping: dict[str, Any], keys: tuple[str, ...], default: Any = None) -> Any:
+    for key in keys:
+        if key in mapping:
+            return mapping[key]
+    return default
+def _fmt_component(value: Any) -> str:
+    return f"{value:.3f}" if isinstance(value, float) else str(value)
+_NON_REWARD_METADATA_KEYS = frozenset({
+    "step",
+    "phase",
+    "tool",
+    "source_count",
+    "error",
+    "explore_steps_used",
+    "repair_steps_used",
+    "sandbox_message",
+    "error_codes",
+})
+_VISIBLE_REWARD_COMPONENTS = {
+    "explore": (
+        "query_quality",
+        "evidence_quality",
+        "information_gain",
+        "efficiency",
+        "explore_total",
+    ),
+    "generate": (
+        "validity",
+        "task_alignment",
+        "structure",
+        "research_usage",
+        "generate_total",
+    ),
+    "repair": (
+        "repair_success",
+        "fixed_prior_errors",
+        "changed_code",
+        "repair_total",
+    ),
+}
+def parse_reward_components(feedback: str) -> dict[str, Any]:
+    """Fallback parser for older observations that lack reward metadata."""
+    dict_match = re.search(r"Reward:\s*(\{.+\})", feedback)
+    if dict_match:
+        try:
+            parsed = ast.literal_eval(dict_match.group(1))
+        except (SyntaxError, ValueError):
+            pass
+        else:
+            if isinstance(parsed, dict):
+                return {k: v for k, v in parsed.items() if k not in ("step", "phase")}
+    kv_match = re.search(r"Reward:\s*(.+)", feedback)
+    if kv_match:
+        return _parse_key_value_components(kv_match.group(1))
+    return {}
+def _parse_key_value_components(text: str) -> dict[str, Any]:
+    components: dict[str, Any] = {}
+    for part in text.split(","):
+        if "=" not in part:
+            continue
+        key, value = part.strip().split("=", 1)
+        try:
+            components[key.strip()] = float(value.strip())
+        except ValueError:
+            components[key.strip()] = value.strip()
+    return components
+def reward_components(obs_dict: dict[str, Any], feedback: str) -> dict[str, Any]:
+    metadata = obs_dict.get("metadata") or {}
+    components = {
+        key: value
+        for key, value in metadata.items()
+        if key not in _NON_REWARD_METADATA_KEYS and isinstance(value, (int, float)) and not isinstance(value, bool)
+    }
+    phase = metadata.get("phase") or obs_dict.get("phase")
+    allowed = _VISIBLE_REWARD_COMPONENTS.get(str(phase))
+    if allowed:
+        visible = {key: components[key] for key in allowed if key in components}
+        if visible:
+            return visible
+    return components or parse_reward_components(feedback)
+def to_obs_dict(obs: Any) -> dict[str, Any]:
+    return obs.model_dump() if hasattr(obs, "model_dump") else vars(obs)
+def fmt_log(log_entries: list[str]) -> str:
+    if not log_entries:
+        return "*No events yet.*"
+    return "```text\n" + "\n".join(log_entries) + "\n```"
+def obs_summary(obs: dict[str, Any]) -> str:
+    return (
+        f"**Topic:** {obs.get('topic', '')}\n"
+        f"**Tier:** {obs.get('tier', '')}\n"
+        f"**Phase:** {obs.get('phase', '')}\n"
+        f"**Explore steps left:** {obs.get('explore_steps_left', 0)}\n"
+        f"**Keywords:** {obs.get('keywords', '')}\n"
+        f"**Data available:** {obs.get('data_available', False)}"
+    )
+def fenced_json(data: dict[str, Any]) -> str:
+    return "```json\n" + json.dumps(data, indent=2, ensure_ascii=False) + "\n```"
+def format_explore_action_md(tool: str, query: str, intent: str) -> str:
+    return fenced_json({"tool": tool, "query": query, "intent": intent})
+def format_code_text(code: str) -> str:
+    return code or ""
+def common_outputs(
+    state: dict[str, Any],
+    status: str = "",
+    obs_md: str = "",
+    feedback: str = "",
+    search: str = "",
+) -> tuple[dict[str, Any], str, str, str, str, str, str, list[list[Any]], str, Any]:
+    return (
+        state,
+        fmt_log(state["log"]),
+        obs_md,
+        feedback,
+        state.get("generated_response", ""),
+        state.get("parsed_response", ""),
+        search,
+        build_top_chunks_df(state.get("top_chunks", [])),
+        build_reward_summary(state["reward_details"]),
+        build_reward_matrix(state["reward_details"]),
+    )
+def llm_outputs(
+    state: dict[str, Any],
+    status: str = "",
+    obs_md: str = "",
+    feedback: str = "",
+    search: str = "",
+) -> tuple[dict[str, Any], str, str, str, str, str, str, list[list[Any]], str, Any]:
+    return common_outputs(state, status=status, obs_md=obs_md, feedback=feedback, search=search)
+async def do_reset(task_label, state):
+    """Reset the environment and start a new episode."""
+    old_sid = state.get("session_id", "")
+    if old_sid:
+        await SESSION_MGR.close(old_sid)
+    state = empty_state()
+    sid = state["session_id"]
+    env_url = _resolve_env_url()
+    # Build reset kwargs — pass topic if a specific task was selected
+    reset_kwargs: dict[str, Any] = {}
+    topic = _TASK_LABEL_TO_TOPIC.get(task_label)
+    if topic:
+        reset_kwargs["topic"] = topic
+    try:
+        env = await SESSION_MGR.get_or_create(sid, env_url)
+        result = await env.reset(**reset_kwargs)
+    except Exception as e:
+        state["log"].append(f"[ERROR] Connection/reset failed: {e}")
+        return common_outputs(state, status=f"Error: {e}")
+    obs = result.observation
+    obs_dict = to_obs_dict(obs)
+    state["obs"] = obs_dict
+    state["phase"] = obs.phase
+    state["topic"] = obs.topic
+    state["tier"] = obs.tier
+    state["keywords"] = obs.keywords
+    state["content"] = obs.content
+    state["data_available"] = obs.data_available
+    state["generated_response"] = ""
+    state["parsed_response"] = ""
+    state["last_code"] = ""
+    state["top_chunks"] = []
+    state["log"].append(f"[START] topic={obs.topic} tier={obs.tier} phase={obs.phase}")
+    status = f"Reset OK — assigned: {obs.topic} [{obs.tier}]"
+    return common_outputs(
+        state,
+        status=status,
+        obs_md=obs_summary(obs_dict),
+        feedback=obs.feedback,
+    )
+async def do_explore(tool, query, intent, state):
+    """Execute an explore step."""
+    if state.get("done"):
+        state["log"].append("[WARN] Episode already done.")
+        return common_outputs(state, status="Episode already done.", feedback="Episode already done.")
+    if not query.strip():
+        return common_outputs(state, status="Empty query — nothing sent.")
+    sid = state.get("session_id", "")
+    env_url = _resolve_env_url()
+    try:
+        env = await SESSION_MGR.get_or_create(sid, env_url)
+    except Exception as e:
+        state["log"].append(f"[ERROR] Connection failed: {e}")
+        return common_outputs(state, status=f"Error: {e}")
+    action = ExplainerAction(
+        action_type="explore",
+        tool=tool,
+        query=query.strip(),
+        intent=intent.strip(),
+    )
+    result = await env.step(action)
+    obs = result.observation
+    reward = result.reward or 0.0
+    obs_dict = to_obs_dict(obs)
+    state["step"] += 1
+    state["rewards"].append(reward)
+    state["obs"] = obs_dict
+    state["phase"] = obs.phase
+    state["done"] = result.done
+    state["explored_context"] = obs.explored_context
+    state["parsed_response"] = format_explore_action_md(tool, query.strip(), intent.strip())
+    state["top_chunks"] = extract_top_chunks(obs_dict, obs.search_results)
+    components = reward_components(obs_dict, obs.feedback)
+    state["reward_details"].append({
+        "step": state["step"],
+        "phase": "explore",
+        "components": components,
+    })
+    state["log"].append(
+        f'[STEP] step={state["step"]} action=explore:{tool}:"{query[:60]}" reward={reward:.3f} done={result.done}'
+    )
+    status = f"Step {state['step']} explore — reward: {reward:.3f}"
+    return common_outputs(
+        state,
+        status=status,
+        obs_md=obs_summary(obs_dict),
+        feedback=obs.feedback,
+        search=obs.search_results,
+    )
+async def do_generate(fmt, code, narration, state):
+    """Execute a generate step."""
+    if state.get("done"):
+        state["log"].append("[WARN] Episode already done.")
+        return common_outputs(state, status="Episode already done.", feedback="Episode already done.")
+    sid = state.get("session_id", "")
+    env_url = _resolve_env_url()
+    try:
+        env = await SESSION_MGR.get_or_create(sid, env_url)
+    except Exception as e:
+        state["log"].append(f"[ERROR] Connection failed: {e}")
+        return common_outputs(state, status=f"Error: {e}")
+    action_type = "repair" if state.get("phase") == "repair" else "generate"
+    action = ExplainerAction(
+        action_type=action_type,
+        format=fmt,
+        code=code,
+        narration=narration,
+    )
+    result = await env.step(action)
+    obs = result.observation
+    reward = result.reward or 0.0
+    obs_dict = to_obs_dict(obs)
+    state["step"] += 1
+    state["rewards"].append(reward)
+    state["obs"] = obs_dict
+    state["phase"] = obs.phase
+    state["done"] = result.done
+    state["last_code"] = code
+    state["last_format"] = fmt
+    state["generated_response"] = format_code_text(code)
+    state["parsed_response"] = fenced_json({
+        "action_type": action_type,
+        "format": fmt,
+        "code_len": len(code),
+        "narration_len": len(narration or ""),
+    })
+    components = reward_components(obs_dict, obs.feedback)
+    state["reward_details"].append({
+        "step": state["step"],
+        "phase": action_type,
+        "components": components,
+    })
+    total_score = normalized_episode_score(sum(state["rewards"]))
+    state["log"].append(
+        f"[STEP] step={state['step']} action={action_type}:{fmt} reward={reward:.3f} done={result.done}"
+    )
+    state["log"].append(
+        f"[END] success={total_score >= SUCCESS_SCORE_THRESHOLD} steps={state['step']} "
+        f"score={total_score:.3f} rewards={','.join(f'{r:.2f}' for r in state['rewards'])}"
+    )
+    status = f"Episode done — score: {total_score:.3f} (generate reward: {reward:.3f})"
+    return common_outputs(
+        state,
+        status=status,
+        obs_md=obs_summary(obs_dict),
+        feedback=obs.feedback,
+    )
+def _llm_error_outputs(state: dict[str, Any], message: str):
+    state["log"].append(f"[ERROR] {message}")
+    state["parsed_response"] = f"**LLM error:** {message}"
+    return llm_outputs(
+        state,
+        obs_md=obs_summary(state.get("obs") or {}) if state.get("obs") else "",
+        feedback=(state.get("obs") or {}).get("feedback", ""),
+    )
+async def do_llm_step(state):
+    """Let the LLM take the next step (explore or generate)."""
+    if state.get("done"):
+        state["log"].append("[WARN] Episode already done.")
+        return llm_outputs(
+            state,
+            feedback="Episode already done.",
+        )
+    from openai import OpenAI
+    api_url, api_key, model = _resolve_llm()
+    if not api_url:
+        return _llm_error_outputs(state, "API_URL is not configured.")
+    if not api_key:
+        return _llm_error_outputs(state, "HF_TOKEN or API_KEY is not configured.")
+    if not model:
+        return _llm_error_outputs(state, "MODEL_NAME is not configured.")
+    client = OpenAI(base_url=api_url, api_key=api_key, timeout=60.0)
+    obs_data = state.get("obs", {})
+    phase = state.get("phase", "explore")
+    llm_response = ""
+    if phase == "explore":
+        prompt = build_explore_prompt(
+            topic=state["topic"],
+            content=state["content"],
+            tier=state["tier"],
+            keywords=state["keywords"],
+            step=state["step"] + 1,
+            steps_left=obs_data.get("explore_steps_left", 0),
+            explored_context=state.get("explored_context", ""),
+            feedback=obs_data.get("feedback", ""),
+        )
+        try:
+            llm_response = call_llm_or_raise(client, prompt, model=model, max_tokens=256)
+        except Exception as exc:
+            return _llm_error_outputs(state, _format_llm_exception(exc, api_url, model))
+        if not llm_response:
+            return _llm_error_outputs(
+                state,
+                f"LLM call failed or returned an empty response from {api_url} using model {model}.",
+            )
+        if llm_response.strip().upper() == "SKIP":
+            state["log"].append("[LLM] Decided to skip exploration. Moving to generate.")
+            state["phase"] = "generate"
+            state["generated_response"] = llm_response
+            state["parsed_response"] = "`SKIP`"
+            return llm_outputs(
+                state,
+                obs_md=obs_summary(obs_data),
+                feedback=obs_data.get("feedback", ""),
+            )
+        tool, query, intent = parse_explore_response(llm_response, state["topic"])
+        state["generated_response"] = llm_response
+        state["parsed_response"] = format_explore_action_md(tool, query, intent)
+        state["log"].append(f'[LLM] Explore tool={tool} query="{query[:80]}"')
+        (
+            s,
+            log,
+            obs_md,
+            feedback,
+            generated_response,
+            parsed_response,
+            search,
+            top_chunks,
+            reward_summary,
+            rewards_table,
+        ) = await do_explore(
+            tool,
+            query,
+            intent,
+            state,
+        )
+        return (
+            s,
+            log,
+            obs_md,
+            feedback,
+            generated_response,
+            parsed_response,
+            search,
+            top_chunks,
+            reward_summary,
+            rewards_table,
+        )
+    elif phase in ("generate", "repair", "done"):
+        if phase == "repair":
+            prompt = build_repair_prompt(
+                topic=state["topic"],
+                tier=state["tier"],
+                fmt=state.get("last_format", "marimo"),
+                previous_code=state.get("last_code", ""),
+                last_errors=obs_data.get("last_errors", ""),
+            )
+        else:
+            prompt = build_generate_prompt(
+                topic=state["topic"],
+                content=state["content"],
+                tier=state["tier"],
+                keywords=state["keywords"],
+                data_available=state.get("data_available", False),
+                explored_context=state.get("explored_context", ""),
+            )
+        try:
+            llm_response = call_llm_or_raise(client, prompt, model=model, max_tokens=4096)
+        except Exception as exc:
+            return _llm_error_outputs(state, _format_llm_exception(exc, api_url, model))
+        if not llm_response:
+            return _llm_error_outputs(
+                state,
+                f"LLM call failed or returned an empty response from {api_url} using model {model}.",
+            )
+        fmt, code, narration = parse_generate_response(llm_response)
+        state["generated_response"] = format_code_text(code)
+        state["parsed_response"] = fenced_json({
+            "format": fmt,
+            "code_len": len(code),
+            "narration_len": len(narration),
+        })
+        state["log"].append(f"[LLM] Generate: format={fmt}, code_len={len(code)}")
+        (
+            s,
+            log,
+            obs_md,
+            feedback,
+            generated_response,
+            parsed_response,
+            search,
+            top_chunks,
+            reward_summary,
+            rewards_table,
+        ) = await do_generate(
+            fmt,
+            code,
+            narration,
+            state,
+        )
+        return (
+            s,
+            log,
+            obs_md,
+            feedback,
+            generated_response,
+            parsed_response,
+            search,
+            top_chunks,
+            reward_summary,
+            rewards_table,
+        )
+    return llm_outputs(state)
+async def do_llm_auto(state):
+    """Run full episode automatically with LLM (explore + generate)."""
+    outputs = None
+    while not state.get("done"):
+        outputs = await do_llm_step(state)
+        state = outputs[0]
+        if state.get("log") and str(state["log"][-1]).startswith("[ERROR]"):
+            break
+    return outputs if outputs else llm_outputs(state, status="No steps taken.")
+# ---------------------------------------------------------------------------
+# Gradio UI
+# ---------------------------------------------------------------------------
+def build_ui():
+    with gr.Blocks(title="Explainer Env — Interactive Runner") as demo:
+        session_state = gr.State(empty_state())
+        # Header
+        gr.Markdown("# Explainer Episode Inspector")
+        # =====================================================================
+        # Controls
+        # =====================================================================
+        with gr.Row(equal_height=True):
+            task_dd = gr.Dropdown(
+                choices=TASK_CHOICES,
+                value="(random)",
+                label="Task",
+                scale=1,
+            )
+        with gr.Row(equal_height=True):
+            reset_btn = gr.Button("Reset Episode", variant="primary")
+            llm_step_btn = gr.Button("Next Step", variant="secondary")
+            llm_auto_btn = gr.Button("Auto Run", variant="primary")
+        # =====================================================================
+        # Inspector panels
+        # =====================================================================
+        with gr.Row(equal_height=False):
+            with gr.Column(scale=1):
+                with gr.Group():
+                    gr.Markdown("### Observation")
+                    obs_md = gr.Markdown("*Click Reset Episode to begin.*")
+                    feedback_box = gr.Textbox(
+                        label="Latest feedback",
+                        lines=8,
+                        max_lines=8,
+                        interactive=False,
+                    )
+            with gr.Column(scale=1):
+                with gr.Group():
+                    gr.Markdown("### LLM")
+                    with gr.Tabs():
+                        with gr.Tab("Parsed"):
+                            parsed_response_box = gr.Markdown("*No parsed response yet.*")
+                        with gr.Tab("Response / code"):
+                            generated_response_box = gr.Textbox(
+                                label="Raw response or generated code",
+                                value="No response yet.",
+                                lines=16,
+                                max_lines=16,
+                                interactive=False,
+                                buttons=["copy"],
+                            )
+        with gr.Row(equal_height=False):
+            with gr.Column(scale=1):
+                with gr.Group():
+                    gr.Markdown("### Research")
+                    search_box = gr.Textbox(
+                        label="Latest search results",
+                        lines=8,
+                        max_lines=8,
+                        interactive=False,
+                    )
+                    top_chunks_table = gr.Dataframe(
+                        headers=["Rank", "Source", "Title", "Score", "URL", "Snippet"],
+                        interactive=False,
+                        column_count=(6, "fixed"),
+                        label="Top chunks",
+                    )
+            with gr.Column(scale=1):
+                with gr.Group():
+                    gr.Markdown("### Rewards")
+                    reward_summary = gr.Markdown("*No rewards yet.*")
+                    rewards_table = gr.Dataframe(
+                        headers=["Reward"],
+                        interactive=False,
+                        column_count=(1, "fixed"),
+                        label="Reward matrix",
+                    )
+        # =====================================================================
+        # Timeline
+        # =====================================================================
+        with gr.Group():
+            gr.Markdown("### Timeline")
+            log_box = gr.Markdown("*No events yet.*")
+        # =====================================================================
+        # Wiring
+        # =====================================================================
+        # Common outputs: state, log, obs, feedback, search, rewards
+        common_output_components = [
+            session_state,
+            log_box,
+            obs_md,
+            feedback_box,
+            generated_response_box,
+            parsed_response_box,
+            search_box,
+            top_chunks_table,
+            reward_summary,
+            rewards_table,
+        ]
+        reset_btn.click(
+            fn=do_reset,
+            inputs=[task_dd, session_state],
+            outputs=common_output_components,
+        )
+        llm_step_btn.click(
+            fn=do_llm_step,
+            inputs=[session_state],
+            outputs=common_output_components,
+        )
+        llm_auto_btn.click(
+            fn=do_llm_auto,
+            inputs=[session_state],
+            outputs=common_output_components,
+        )
+    return demo
+if __name__ == "__main__":
+    demo = build_ui()
+    demo.launch(server_name="0.0.0.0", server_port=7860)

dashboard_prompts.py ADDED Viewed

	@@ -0,0 +1,215 @@

+"""Prompt builders for the Gradio dashboard."""
+import json
+import textwrap
+try:
+    from .constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS
+except ImportError:  # pragma: no cover
+    from constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS
+SYSTEM_PROMPT = textwrap.dedent("""\
+You are an expert educator that creates interactive explanations of technical topics.
+You interact with an environment in two phases:
+## Phase 1: EXPLORE
+Search for relevant information. You'll be given a topic + tier (beginner/intermediate/advanced).
+- Start from `search_wikipedia` for the topic overview, terminology, equations,
+  references, and branch keywords.
+- Then use what you learned from Wikipedia/top chunks to choose the next search
+  avenue: arXiv/Scholar/HF papers for deeper sources, `fetch_docs` for
+  Marimo/Manim/API/code patterns, and HF Hub for model/dataset/examples.
+- Decide search queries to gather relevant material
+- Choose one explicit research tool:
+  - search_wikipedia: fundamentals and beginner explanations
+  - search_hf_papers: ML/AI papers from Hugging Face Papers
+  - search_arxiv: scientific/math/ML papers from arXiv
+  - search_scholar: paper metadata, abstracts, citations
+  - fetch_docs: library/API documentation for code, plots, Marimo, Manim
+  - search_hf_hub: model cards, datasets, Spaces, examples
+- Explore for what the generated code needs: formulas, pseudocode, visual intuition,
+  implementation examples, and Marimo/Manim/API patterns.
+- Use `fetch_docs` when you need code examples or interactive artifact patterns.
+  Do not repeat broad Wikipedia/paper overview searches when code-oriented context is missing.
+- You have up to {MAX_EXPLORE_STEPS} explore steps. Stop early if you have enough info.
+## Phase 2: GENERATE
+Produce a complete, runnable Python file in one of two formats:
+### marimo notebook format (STRICT)
+First line: `import marimo`
+Second line: `app = marimo.App()`
+Use `@app.cell` functions, import shared libraries in the first cell, return shared
+variables explicitly, and use underscore-prefixed scratch variables by default to
+avoid MB002. Last line: `if __name__ == "__main__": app.run()`.
+### manim animation format
+Use a Scene class with `construct()`, `self.play()`, and `self.wait()`.
+## Phase 3: REPAIR
+If validation fails, submit a revised complete file using the exact error feedback.
+For EXPLORE actions, respond with a JSON object:
+```json
+{
+  "tool": "search_wikipedia | search_hf_papers | search_arxiv | search_scholar | fetch_docs | search_hf_hub",
+  "query": "search query",
+  "intent": "what you need from this source"
+}
+```
+For GENERATE actions, respond with a JSON object:
+```json
+{
+  "format": "marimo" or "manim",
+  "code": "complete Python source code",
+  "narration": "scene narration (manim only, empty string for marimo)"
+}
+```
+""").replace("{MAX_EXPLORE_STEPS}", str(MAX_EXPLORE_STEPS)).replace(
+    "{MAX_REPAIR_STEPS}",
+    str(MAX_REPAIR_STEPS),
+)
+def build_explore_prompt(
+    topic: str,
+    content: str,
+    tier: str,
+    keywords: str,
+    step: int,
+    steps_left: int,
+    explored_context: str,
+    feedback: str,
+) -> str:
+    return textwrap.dedent(f"""\
+TOPIC: {topic}
+TIER: {tier}
+KEYWORDS: {keywords}
+DESCRIPTION: {content}
+PHASE: EXPLORE (step {step}, {steps_left} steps left)
+PREVIOUS RESEARCH:
+{explored_context or "(none yet)"}
+FEEDBACK: {feedback}
+Provide a search query to find relevant information about this topic.
+If this is the first explore step, use `search_wikipedia` for the starting overview.
+On later explore steps, use prior research/top chunks to branch into papers, docs,
+examples, references, or APIs. Prefer queries/intents that will help write the final
+interactive code: equations, pseudocode, visual examples, implementation details,
+or Marimo/Manim docs.
+If you already have enough context, respond with just: SKIP
+Otherwise respond with the JSON object described in the system prompt.
+""")
+def build_generate_prompt(
+    topic: str,
+    content: str,
+    tier: str,
+    keywords: str,
+    data_available: bool,
+    explored_context: str,
+) -> str:
+    format_hint = ""
+    if data_available:
+        format_hint = "This topic has associated data - consider marimo with data visualizations."
+    return textwrap.dedent(f"""\
+TOPIC: {topic}
+TIER: {tier}
+KEYWORDS: {keywords}
+DESCRIPTION: {content}
+DATA AVAILABLE: {data_available}
+{format_hint}
+ACCUMULATED RESEARCH:
+{explored_context or "(no research done)"}
+PHASE: GENERATE
+Create a complete, runnable interactive explanation. Choose the best format (marimo or manim).
+Respond with a JSON object:
+```json
+{{
+  "format": "marimo" or "manim",
+  "code": "complete Python source code here",
+  "narration": "scene-by-scene narration (manim only, empty for marimo)"
+}}
+```
+Requirements:
+- For marimo: first line `import marimo`, second line `app = marimo.App()`,
+  every cell has an explicit return, scratch variables use underscore prefixes,
+  and the file ends with `if __name__ == "__main__": app.run()`.
+- For manim: Scene class with construct(), self.play() animations, MathTex for math.
+- Cover the key concepts from the keywords.
+- Match the depth to the tier level ({tier}).
+- Incorporate findings from the research above.
+""")
+def build_repair_prompt(
+    topic: str,
+    tier: str,
+    fmt: str,
+    previous_code: str,
+    last_errors: str,
+) -> str:
+    return textwrap.dedent(f"""\
+TOPIC: {topic}
+TIER: {tier}
+FORMAT: {fmt}
+The previous generated artifact failed validation.
+ERROR FEEDBACK:
+{last_errors}
+PREVIOUS CODE:
+```python
+{previous_code}
+```
+Submit a corrected complete Python file. Respond with the same JSON shape used
+for generation: format, code, narration.
+If the error is MB002, do a full-file variable audit before answering. Fix the
+assignment names and loop variable names, not just the return values.
+""")
+def parse_generate_response(response: str) -> tuple[str, str, str]:
+    text = response.strip()
+    if "```json" in text:
+        text = text.split("```json", 1)[1].split("```", 1)[0].strip()
+    elif "```" in text:
+        text = text.split("```", 1)[1].split("```", 1)[0].strip()
+    try:
+        data = json.loads(text)
+        return data.get("format", "marimo"), data.get("code", ""), data.get("narration", "")
+    except json.JSONDecodeError:
+        if "from manim" in response or ("class " in response and "Scene" in response):
+            return "manim", response, ""
+        return "marimo", response, ""
+def parse_explore_response(response: str, fallback_query: str) -> tuple[str, str, str]:
+    text = response.strip()
+    if "```json" in text:
+        text = text.split("```json", 1)[1].split("```", 1)[0].strip()
+    elif text.startswith("```"):
+        text = text.split("```", 1)[1].split("```", 1)[0].strip()
+    try:
+        data = json.loads(text)
+        return (
+            data.get("tool", "search_wikipedia"),
+            data.get("query", fallback_query),
+            data.get("intent", "gather background and examples"),
+        )
+    except json.JSONDecodeError:
+        return "search_wikipedia", fallback_query, "gather background and examples"

server/app.py CHANGED Viewed

@@ -29,9 +29,7 @@ Usage:
 """
 import os
-import sys
 from contextlib import asynccontextmanager
-from pathlib import Path
 try:
     from openenv.core.env_server.http_server import create_app
@@ -50,11 +48,10 @@ except ImportError:
 def _build_dashboard_tab(*_args, **_kwargs):
     """Return the project dashboard as an OpenEnv custom web-interface tab."""
-    project_root = Path(__file__).resolve().parents[2]
-    if str(project_root) not in sys.path:
-        sys.path.insert(0, str(project_root))
-    from dashboard import build_ui
     return build_ui()

 """
 import os
 from contextlib import asynccontextmanager
 try:
     from openenv.core.env_server.http_server import create_app
 def _build_dashboard_tab(*_args, **_kwargs):
     """Return the project dashboard as an OpenEnv custom web-interface tab."""
+    try:
+        from ..dashboard import build_ui
+    except ImportError:  # pragma: no cover - supports uvicorn server.app:app
+        from dashboard import build_ui
     return build_ui()