Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- README.md +1 -3
- __marimo__/session/a.py.json +0 -0
- a.py +141 -112
- dashboard.py +900 -0
- dashboard_prompts.py +215 -0
- server/app.py +4 -7
README.md
CHANGED
|
@@ -14,9 +14,7 @@ tags:
|
|
| 14 |
---
|
| 15 |
|
| 16 |
<p align="center">
|
| 17 |
-
|
| 18 |
-
https://kgdrathan-explainer-env-dashboard.hf.space/
|
| 19 |
-
</a>
|
| 20 |
</p>
|
| 21 |
|
| 22 |
<p align="center">
|
|
|
|
| 14 |
---
|
| 15 |
|
| 16 |
<p align="center">
|
| 17 |
+
The dashboard is served by this Space at <code>/web/</code> in the custom tab.
|
|
|
|
|
|
|
| 18 |
</p>
|
| 19 |
|
| 20 |
<p align="center">
|
__marimo__/session/a.py.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
a.py
CHANGED
|
@@ -1,164 +1,193 @@
|
|
| 1 |
-
import marimo
|
| 2 |
-
import numpy as np
|
| 3 |
-
import pandas as pd
|
| 4 |
-
import matplotlib.pyplot as plt
|
| 5 |
-
from matplotlib.patches import Rectangle
|
| 6 |
|
| 7 |
-
|
| 8 |
-
app =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
@app.cell
|
| 12 |
def _(mo):
|
| 13 |
mo.md("""
|
| 14 |
-
#
|
| 15 |
|
| 16 |
-
|
| 17 |
|
| 18 |
-
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
6. **Policy (π)**: Strategy that agents use to decide actions
|
| 26 |
-
7. **Value Function (V)**: How good it is to be in a state
|
| 27 |
-
8. **Q-Function (Q)**: How good it is to take an action in a state
|
| 28 |
-
9. **Bellman Equation**: Relationship between value functions at different time steps
|
| 29 |
""")
|
| 30 |
return
|
| 31 |
|
| 32 |
|
| 33 |
-
@app.cell
|
| 34 |
-
def _(mo):
|
| 35 |
-
# Simple grid world example
|
| 36 |
-
grid_size = 4
|
| 37 |
-
start = (0, 0)
|
| 38 |
-
goal = (3, 3)
|
| 39 |
-
obstacles = [(1, 1), (1, 2)]
|
| 40 |
-
|
| 41 |
-
# Create a simple visualization
|
| 42 |
-
_fig, _ax = plt.subplots(figsize=(6, 6))
|
| 43 |
-
_ax.set_xlim(0, grid_size)
|
| 44 |
-
_ax.set_ylim(0, grid_size)
|
| 45 |
-
_ax.set_xticks(range(grid_size))
|
| 46 |
-
_ax.set_yticks(range(grid_size))
|
| 47 |
-
_ax.grid(True)
|
| 48 |
-
|
| 49 |
-
# Draw obstacles
|
| 50 |
-
for obs in obstacles:
|
| 51 |
-
rect = Rectangle((obs[0], obs[1]), 1, 1, facecolor="black", alpha=0.7)
|
| 52 |
-
_ax.add_patch(rect)
|
| 53 |
-
|
| 54 |
-
# Draw start and goal
|
| 55 |
-
start_rect = Rectangle(start, 1, 1, facecolor="green", alpha=0.7)
|
| 56 |
-
goal_rect = Rectangle(goal, 1, 1, facecolor="red", alpha=0.7)
|
| 57 |
-
_ax.add_patch(start_rect)
|
| 58 |
-
_ax.add_patch(goal_rect)
|
| 59 |
-
|
| 60 |
-
_ax.text(start[0] + 0.5, start[1] + 0.5, "Start", ha="center", va="center")
|
| 61 |
-
_ax.text(goal[0] + 0.5, goal[1] + 0.5, "Goal", ha="center", va="center")
|
| 62 |
-
|
| 63 |
-
_ax.set_title("Simple Grid World Example")
|
| 64 |
-
_ax.invert_yaxis() # To match standard grid coordinates
|
| 65 |
-
|
| 66 |
-
mo.ui.matplotlib(_fig)
|
| 67 |
-
plt.close(_fig)
|
| 68 |
-
return
|
| 69 |
-
|
| 70 |
-
|
| 71 |
@app.cell
|
| 72 |
def _(mo):
|
| 73 |
mo.md("""
|
| 74 |
-
##
|
| 75 |
-
|
| 76 |
-
The agent interacts with the environment in episodes:
|
| 77 |
-
|
| 78 |
-
1. **Observe State (s)**: Agent senses its current situation
|
| 79 |
-
2. **Choose Action (a)**: Based on policy π(a|s)
|
| 80 |
-
3. **Environment Transitions**: Move to new state s'
|
| 81 |
-
4. **Receive Reward (r)**: Immediate feedback
|
| 82 |
-
5. **Update Knowledge**: Learn from experience
|
| 83 |
-
|
| 84 |
-
The goal is to maximize expected cumulative discounted reward:
|
| 85 |
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
|
| 89 |
""")
|
| 90 |
return
|
| 91 |
|
| 92 |
|
| 93 |
@app.cell
|
| 94 |
def _(mo):
|
| 95 |
-
#
|
| 96 |
-
|
| 97 |
-
#
|
|
|
|
| 98 |
|
| 99 |
-
|
|
|
|
|
|
|
| 100 |
|
| 101 |
-
|
|
|
|
| 102 |
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
-
|
| 106 |
|
| 107 |
-
|
| 108 |
-
""")
|
| 109 |
-
return
|
| 110 |
|
|
|
|
| 111 |
|
| 112 |
-
|
| 113 |
-
def _(mo):
|
| 114 |
-
# Bellman Equation explanation
|
| 115 |
-
mo.md("""
|
| 116 |
-
### Bellman Equation
|
| 117 |
|
| 118 |
-
|
| 119 |
|
| 120 |
-
|
| 121 |
|
| 122 |
-
|
|
|
|
|
|
|
| 123 |
|
| 124 |
-
$Q(s,a) = \sum_{s'} P(s'|s,a)[r(s,a,s') + \gamma \max_{a'} Q(s',a')]$
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
return
|
| 129 |
|
| 130 |
|
| 131 |
@app.cell
|
| 132 |
def _(mo):
|
| 133 |
-
# Policy definition
|
| 134 |
mo.md("""
|
| 135 |
-
##
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
""")
|
| 145 |
return
|
| 146 |
|
| 147 |
|
| 148 |
@app.cell
|
| 149 |
-
def _(
|
| 150 |
-
|
| 151 |
-
mo.md("""
|
| 152 |
-
## Try It Yourself!
|
| 153 |
-
|
| 154 |
-
Below is an interactive grid world. You can visualize how an agent might navigate from start to goal while avoiding obstacles.
|
| 155 |
|
| 156 |
-
### Next Steps
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
- Study how value functions converge over time
|
| 161 |
-
""")
|
| 162 |
return
|
| 163 |
|
| 164 |
|
|
|
|
| 1 |
+
import marimo
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
__generated_with = "0.23.3"
|
| 4 |
+
app = marimo.App()
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@app.cell
|
| 8 |
+
def _():
|
| 9 |
+
import marimo as mo
|
| 10 |
+
import numpy as np
|
| 11 |
+
import matplotlib.pyplot as plt
|
| 12 |
+
from matplotlib.patches import Rectangle
|
| 13 |
+
|
| 14 |
+
return Rectangle, mo, plt
|
| 15 |
|
| 16 |
|
| 17 |
@app.cell
|
| 18 |
def _(mo):
|
| 19 |
mo.md("""
|
| 20 |
+
# Bayes' Theorem: Updating Beliefs with Evidence
|
| 21 |
|
| 22 |
+
Bayes' Theorem is a fundamental concept in probability theory that helps us update our beliefs based on new evidence.
|
| 23 |
|
| 24 |
+
The formula is:
|
| 25 |
+
$$P(A|B) = rac{P(B|A)P(A)}{P(B)}$$
|
| 26 |
|
| 27 |
+
Where:
|
| 28 |
+
- $P(A|B)$ is the **posterior probability**: the probability of event A given that B occurred
|
| 29 |
+
- $P(B|A)$ is the **likelihood**: the probability of observing B given that A is true
|
| 30 |
+
- $P(A)$ is the **prior probability**: our initial belief about A before seeing B
|
| 31 |
+
- $P(B)$ is the **evidence**: the overall probability of observing B
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
""")
|
| 33 |
return
|
| 34 |
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
@app.cell
|
| 37 |
def _(mo):
|
| 38 |
mo.md("""
|
| 39 |
+
## Medical Testing Example
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
Let's say we're testing for a rare disease:
|
| 42 |
+
- Prevalence (prior probability): 0.1% of the population has the disease
|
| 43 |
+
- Test accuracy:
|
| 44 |
+
- If you have the disease, the test correctly identifies it 99% of the time (true positive rate)
|
| 45 |
+
- If you don't have the disease, the test incorrectly says you do 5% of the time (false positive rate)
|
| 46 |
|
| 47 |
+
What's the probability that someone who tests positive actually has the disease?
|
| 48 |
""")
|
| 49 |
return
|
| 50 |
|
| 51 |
|
| 52 |
@app.cell
|
| 53 |
def _(mo):
|
| 54 |
+
# Define the parameters
|
| 55 |
+
prior_disease = 0.001 # P(Disease)
|
| 56 |
+
sensitivity = 0.99 # P(Test+ | Disease)
|
| 57 |
+
false_positive_rate = 0.05 # P(Test+ | No Disease)
|
| 58 |
|
| 59 |
+
# Calculate the components
|
| 60 |
+
# P(Test+)
|
| 61 |
+
p_test_positive = sensitivity * prior_disease + false_positive_rate * (1 - prior_disease)
|
| 62 |
|
| 63 |
+
# Apply Bayes' Theorem
|
| 64 |
+
posterior_disease = (sensitivity * prior_disease) / p_test_positive
|
| 65 |
|
| 66 |
+
# Display results
|
| 67 |
+
mo.md(f"""
|
| 68 |
+
Given:
|
| 69 |
+
- Prior probability of disease: {prior_disease * 100:.1f}%
|
| 70 |
+
- Sensitivity (true positive rate): {sensitivity * 100}%
|
| 71 |
+
- False positive rate: {false_positive_rate * 100}%
|
| 72 |
|
| 73 |
+
Using Bayes' theorem:
|
| 74 |
|
| 75 |
+
P(Disease|Test+) = [P(Test+|Disease) × P(Disease)] / P(Test+)
|
|
|
|
|
|
|
| 76 |
|
| 77 |
+
P(Test+) = P(Test+|Disease) × P(Disease) + P(Test+|No Disease) × P(No Disease)
|
| 78 |
|
| 79 |
+
P(Test+) = {sensitivity:.2f} × {prior_disease:.3f} + {false_positive_rate:.2f} × {(1 - prior_disease):.3f}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
+
P(Test+) = {p_test_positive:.4f}
|
| 82 |
|
| 83 |
+
P(Disease|Test+) = ({sensitivity:.2f} × {prior_disease:.3f}) / {p_test_positive:.4f} = {posterior_disease:.3f}
|
| 84 |
|
| 85 |
+
**Only {posterior_disease * 100:.1f}% of people who test positive actually have the disease!**
|
| 86 |
+
""")
|
| 87 |
+
return
|
| 88 |
|
|
|
|
| 89 |
|
| 90 |
+
@app.cell
|
| 91 |
+
def _(Rectangle, mo, plt):
|
| 92 |
+
# Create a visualization showing the four categories
|
| 93 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 94 |
+
|
| 95 |
+
# Set up the grid
|
| 96 |
+
ax.set_xlim(0, 10)
|
| 97 |
+
ax.set_ylim(0, 8)
|
| 98 |
+
|
| 99 |
+
# Draw rectangles for different categories
|
| 100 |
+
# Population (1000 people)
|
| 101 |
+
ax.add_patch(Rectangle((1, 1), 8, 6, fill=False, edgecolor="black", linewidth=2))
|
| 102 |
+
ax.text(5, 7.2, "Population (1000 people)", ha="center", va="bottom")
|
| 103 |
+
|
| 104 |
+
# Disease (0.1%)
|
| 105 |
+
disease_count = 1000 * 0.001
|
| 106 |
+
no_disease_count = 1000 - disease_count
|
| 107 |
+
|
| 108 |
+
# Draw disease area
|
| 109 |
+
ax.add_patch(Rectangle((1, 1), 8, disease_count / 1000 * 6, facecolor="red", alpha=0.5))
|
| 110 |
+
ax.text(
|
| 111 |
+
5,
|
| 112 |
+
1.5 + disease_count / 1000 * 3,
|
| 113 |
+
f"Disease ({disease_count:.0f} people)",
|
| 114 |
+
ha="center",
|
| 115 |
+
va="center",
|
| 116 |
+
color="white",
|
| 117 |
+
weight="bold",
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# Draw non-disease area
|
| 121 |
+
ax.add_patch(
|
| 122 |
+
Rectangle((1, 1 + disease_count / 1000 * 6), 8, no_disease_count / 1000 * 6, facecolor="blue", alpha=0.5)
|
| 123 |
+
)
|
| 124 |
+
ax.text(
|
| 125 |
+
5,
|
| 126 |
+
1.5 + disease_count / 1000 * 6 + no_disease_count / 1000 * 3,
|
| 127 |
+
f"No Disease ({no_disease_count:.0f} people)",
|
| 128 |
+
ha="center",
|
| 129 |
+
va="center",
|
| 130 |
+
color="white",
|
| 131 |
+
weight="bold",
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# Add labels
|
| 135 |
+
ax.text(0.5, 4, "True Positive\n(99% of Diseased)", ha="right", va="center")
|
| 136 |
+
ax.text(0.5, 2, "False Positive\n(5% of Non-Diseased)", ha="right", va="center")
|
| 137 |
+
|
| 138 |
+
# Add test results
|
| 139 |
+
tp = disease_count * 0.99 # True positives
|
| 140 |
+
fp = no_disease_count * 0.05 # False positives
|
| 141 |
+
|
| 142 |
+
ax.add_patch(Rectangle((2, 3.5), 2, 1, facecolor="green", alpha=0.7))
|
| 143 |
+
ax.text(3, 4, f"True Positive\n({tp:.0f})", ha="center", va="center")
|
| 144 |
+
|
| 145 |
+
ax.add_patch(Rectangle((2, 1.5), 2, 1, facecolor="orange", alpha=0.7))
|
| 146 |
+
ax.text(3, 2, f"False Positive\n({fp:.0f})", ha="center", va="center")
|
| 147 |
+
|
| 148 |
+
# Add total positive tests
|
| 149 |
+
total_positives = tp + fp
|
| 150 |
+
ax.add_patch(Rectangle((5, 1.5), 3, 2, facecolor="yellow", alpha=0.5))
|
| 151 |
+
ax.text(6.5, 2.5, f"Total Positive\nTests ({total_positives:.0f})", ha="center", va="center")
|
| 152 |
+
|
| 153 |
+
ax.set_xticks([])
|
| 154 |
+
ax.set_yticks([])
|
| 155 |
+
ax.set_title("Bayes' Theorem Visualization: Medical Testing")
|
| 156 |
+
|
| 157 |
+
mo.ui.matplotlib(plt.gca())
|
| 158 |
return
|
| 159 |
|
| 160 |
|
| 161 |
@app.cell
|
| 162 |
def _(mo):
|
|
|
|
| 163 |
mo.md("""
|
| 164 |
+
## Why This Matters
|
| 165 |
+
|
| 166 |
+
This example shows why Bayes' Theorem is important:
|
| 167 |
+
|
| 168 |
+
1. **High false positive rate** combined with **low prevalence** leads to counterintuitive results
|
| 169 |
+
2. **95% accurate tests** can still give misleading results when the condition is rare
|
| 170 |
+
3. **Bayes' Theorem forces us to think about**:
|
| 171 |
+
- Our initial beliefs (prior probability)
|
| 172 |
+
- How likely we are to observe evidence given our beliefs
|
| 173 |
+
- How to update our beliefs in light of new evidence
|
| 174 |
+
|
| 175 |
+
This same logic applies to:
|
| 176 |
+
- Spam detection
|
| 177 |
+
- Financial risk assessment
|
| 178 |
+
- Scientific hypothesis testing
|
| 179 |
+
- Machine learning classification
|
| 180 |
""")
|
| 181 |
return
|
| 182 |
|
| 183 |
|
| 184 |
@app.cell
|
| 185 |
+
def _():
|
| 186 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
|
|
|
| 188 |
|
| 189 |
+
@app.cell
|
| 190 |
+
def _():
|
|
|
|
|
|
|
| 191 |
return
|
| 192 |
|
| 193 |
|
dashboard.py
ADDED
|
@@ -0,0 +1,900 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio UI for the Research → Interactive Explainer Environment.
|
| 3 |
+
|
| 4 |
+
Two modes:
|
| 5 |
+
1. LLM Mode: LLM drives exploration + generation, human watches step-by-step
|
| 6 |
+
2. Human Mode: human types queries and code, sees rewards in real-time
|
| 7 |
+
|
| 8 |
+
Environment service is the same OpenEnv server that hosts this UI.
|
| 9 |
+
LLM configuration is resolved from API_URL, HF_TOKEN/API_KEY, and MODEL_NAME.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import ast
|
| 13 |
+
import json
|
| 14 |
+
import os
|
| 15 |
+
import re
|
| 16 |
+
import uuid
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Any
|
| 19 |
+
|
| 20 |
+
import gradio as gr
|
| 21 |
+
from dotenv import load_dotenv
|
| 22 |
+
|
| 23 |
+
# Load .env from project root
|
| 24 |
+
PROJECT_ROOT = Path(__file__).parent
|
| 25 |
+
|
| 26 |
+
load_dotenv(PROJECT_ROOT / ".env")
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
from .client import ExplainerEnv
|
| 30 |
+
from .constants import SUCCESS_SCORE_THRESHOLD, normalized_episode_score
|
| 31 |
+
from .dashboard_prompts import (
|
| 32 |
+
SYSTEM_PROMPT,
|
| 33 |
+
build_explore_prompt,
|
| 34 |
+
build_generate_prompt,
|
| 35 |
+
build_repair_prompt,
|
| 36 |
+
parse_explore_response,
|
| 37 |
+
parse_generate_response,
|
| 38 |
+
)
|
| 39 |
+
from .models import ExplainerAction
|
| 40 |
+
from .task_bank import ALL_TASKS
|
| 41 |
+
except ImportError: # pragma: no cover - supports direct execution from env root
|
| 42 |
+
from client import ExplainerEnv
|
| 43 |
+
from constants import SUCCESS_SCORE_THRESHOLD, normalized_episode_score
|
| 44 |
+
from dashboard_prompts import (
|
| 45 |
+
SYSTEM_PROMPT,
|
| 46 |
+
build_explore_prompt,
|
| 47 |
+
build_generate_prompt,
|
| 48 |
+
build_repair_prompt,
|
| 49 |
+
parse_explore_response,
|
| 50 |
+
parse_generate_response,
|
| 51 |
+
)
|
| 52 |
+
from models import ExplainerAction
|
| 53 |
+
from task_bank import ALL_TASKS
|
| 54 |
+
|
| 55 |
+
SELF_ENV_BASE_URL = f"http://127.0.0.1:{os.getenv('PORT', '8000')}"
|
| 56 |
+
DEFAULT_MODEL_NAME = "bedrock-qwen3-coder-30b-a3b"
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# ---------------------------------------------------------------------------
|
| 60 |
+
# Task catalog (reference only)
|
| 61 |
+
# ---------------------------------------------------------------------------
|
| 62 |
+
|
| 63 |
+
TASK_CHOICES = ["(random)"] + [f"{t.topic} [{t.difficulty}, {t.tier}]" for t in ALL_TASKS]
|
| 64 |
+
# Map dropdown label -> topic name for reset(topic=...)
|
| 65 |
+
_TASK_LABEL_TO_TOPIC: dict[str, str] = {f"{t.topic} [{t.difficulty}, {t.tier}]": t.topic for t in ALL_TASKS}
|
| 66 |
+
|
| 67 |
+
# ---------------------------------------------------------------------------
|
| 68 |
+
# Session manager
|
| 69 |
+
# ---------------------------------------------------------------------------
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class SessionManager:
|
| 73 |
+
"""Module-level registry mapping session_id -> connected ExplainerEnv client."""
|
| 74 |
+
|
| 75 |
+
def __init__(self):
|
| 76 |
+
self._clients: dict[str, ExplainerEnv] = {}
|
| 77 |
+
self._urls: dict[str, str] = {}
|
| 78 |
+
|
| 79 |
+
async def get_or_create(self, session_id: str, base_url: str) -> ExplainerEnv:
|
| 80 |
+
if session_id in self._clients and self._urls.get(session_id) != base_url:
|
| 81 |
+
await self.close(session_id)
|
| 82 |
+
if session_id not in self._clients:
|
| 83 |
+
client = ExplainerEnv(base_url=base_url.rstrip("/"))
|
| 84 |
+
await client.connect()
|
| 85 |
+
self._clients[session_id] = client
|
| 86 |
+
self._urls[session_id] = base_url
|
| 87 |
+
return self._clients[session_id]
|
| 88 |
+
|
| 89 |
+
async def close(self, session_id: str) -> None:
|
| 90 |
+
client = self._clients.pop(session_id, None)
|
| 91 |
+
self._urls.pop(session_id, None)
|
| 92 |
+
if client:
|
| 93 |
+
try:
|
| 94 |
+
await client.disconnect()
|
| 95 |
+
except Exception:
|
| 96 |
+
pass
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
SESSION_MGR = SessionManager()
|
| 100 |
+
|
| 101 |
+
# ---------------------------------------------------------------------------
|
| 102 |
+
# Helpers
|
| 103 |
+
# ---------------------------------------------------------------------------
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def _resolve_env_url() -> str:
|
| 107 |
+
return SELF_ENV_BASE_URL
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _resolve_llm() -> tuple[str, str, str]:
|
| 111 |
+
api_url = (os.getenv("API_URL") or os.getenv("API_BASE_URL") or "").rstrip("/")
|
| 112 |
+
api_key = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
|
| 113 |
+
model = os.getenv("MODEL_NAME") or DEFAULT_MODEL_NAME
|
| 114 |
+
return api_url, api_key, model
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def call_llm_or_raise(client: Any, user_prompt: str, *, model: str, max_tokens: int) -> str:
|
| 118 |
+
"""Call the LLM and preserve provider errors for the dashboard."""
|
| 119 |
+
completion = client.chat.completions.create(
|
| 120 |
+
model=model,
|
| 121 |
+
messages=[
|
| 122 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 123 |
+
{"role": "user", "content": user_prompt},
|
| 124 |
+
],
|
| 125 |
+
temperature=0.7,
|
| 126 |
+
max_tokens=max_tokens,
|
| 127 |
+
stream=False,
|
| 128 |
+
)
|
| 129 |
+
return (completion.choices[0].message.content or "").strip()
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def _format_llm_exception(exc: Exception, api_url: str, model: str) -> str:
|
| 133 |
+
cause = getattr(exc, "__cause__", None)
|
| 134 |
+
detail = str(cause or exc).strip() or exc.__class__.__name__
|
| 135 |
+
return f"{exc.__class__.__name__} from {api_url} using model {model}: {detail}"
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def empty_state() -> dict[str, Any]:
|
| 139 |
+
return {
|
| 140 |
+
"session_id": str(uuid.uuid4()),
|
| 141 |
+
"obs": None,
|
| 142 |
+
"step": 0,
|
| 143 |
+
"rewards": [],
|
| 144 |
+
"reward_details": [],
|
| 145 |
+
"log": [],
|
| 146 |
+
"done": False,
|
| 147 |
+
"phase": "not_started",
|
| 148 |
+
"explored_context": "",
|
| 149 |
+
"topic": "",
|
| 150 |
+
"tier": "",
|
| 151 |
+
"keywords": "",
|
| 152 |
+
"content": "",
|
| 153 |
+
"data_available": False,
|
| 154 |
+
"last_code": "",
|
| 155 |
+
"last_format": "marimo",
|
| 156 |
+
"generated_response": "",
|
| 157 |
+
"parsed_response": "",
|
| 158 |
+
"top_chunks": [],
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def build_reward_matrix(reward_details: list[dict[str, Any]]) -> gr.update:
|
| 163 |
+
"""Build a reward matrix with reward names as rows and steps as columns."""
|
| 164 |
+
steps = sorted({entry["step"] for entry in reward_details})
|
| 165 |
+
reward_names: list[str] = []
|
| 166 |
+
cells: dict[tuple[str, int], Any] = {}
|
| 167 |
+
|
| 168 |
+
for entry in reward_details:
|
| 169 |
+
step = entry["step"]
|
| 170 |
+
components = entry.get("components", {})
|
| 171 |
+
if not components:
|
| 172 |
+
components = {"total": ""}
|
| 173 |
+
for name, value in components.items():
|
| 174 |
+
if name not in reward_names:
|
| 175 |
+
reward_names.append(name)
|
| 176 |
+
cells[(name, step)] = value
|
| 177 |
+
|
| 178 |
+
headers = ["Reward"] + [f"Step {step}" for step in steps]
|
| 179 |
+
rows = []
|
| 180 |
+
for name in reward_names:
|
| 181 |
+
row = [name]
|
| 182 |
+
for step in steps:
|
| 183 |
+
value = cells.get((name, step), "")
|
| 184 |
+
row.append(_fmt_component(value) if value != "" else "")
|
| 185 |
+
rows.append(row)
|
| 186 |
+
|
| 187 |
+
return gr.update(
|
| 188 |
+
headers=headers,
|
| 189 |
+
value=rows,
|
| 190 |
+
column_count=(len(headers), "fixed"),
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
def build_reward_summary(reward_details: list[dict[str, Any]]) -> str:
|
| 195 |
+
if not reward_details:
|
| 196 |
+
return "*No rewards yet.*"
|
| 197 |
+
sections = []
|
| 198 |
+
for entry in reward_details:
|
| 199 |
+
components = entry.get("components", {})
|
| 200 |
+
total = _first_present(
|
| 201 |
+
components,
|
| 202 |
+
("explore_total", "generate_total", "repair_total"),
|
| 203 |
+
default="n/a",
|
| 204 |
+
)
|
| 205 |
+
sections.append(f"**Step {entry['step']} · {entry['phase']} · total {_fmt_component(total)}**")
|
| 206 |
+
return "\n\n".join(sections)
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def build_top_chunks_df(chunks: list[dict[str, Any]]) -> list[list[Any]]:
|
| 210 |
+
rows = []
|
| 211 |
+
for chunk in chunks[:5]:
|
| 212 |
+
rows.append([
|
| 213 |
+
chunk.get("rank", ""),
|
| 214 |
+
chunk.get("source", ""),
|
| 215 |
+
chunk.get("title", ""),
|
| 216 |
+
chunk.get("score", ""),
|
| 217 |
+
chunk.get("url", ""),
|
| 218 |
+
_trim_display_text(str(chunk.get("snippet", "")), 700),
|
| 219 |
+
])
|
| 220 |
+
return rows
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def extract_top_chunks(obs_dict: dict[str, Any], search_results: str) -> list[dict[str, Any]]:
|
| 224 |
+
metadata = obs_dict.get("metadata") or {}
|
| 225 |
+
chunks = obs_dict.get("top_chunks") or metadata.get("top_chunks") or []
|
| 226 |
+
return chunks or parse_rendered_chunks(search_results)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def parse_rendered_chunks(search_results: str) -> list[dict[str, Any]]:
|
| 230 |
+
"""Fallback parser for rendered research results if structured fields are absent."""
|
| 231 |
+
chunks = []
|
| 232 |
+
for part in re.split(r"\n\n---\n\n", search_results or ""):
|
| 233 |
+
lines = [line for line in part.splitlines() if line.strip()]
|
| 234 |
+
if not lines:
|
| 235 |
+
continue
|
| 236 |
+
match = re.match(r"\[(\d+)\]\s+([^:]+):\s+(.+)", lines[0])
|
| 237 |
+
if not match:
|
| 238 |
+
continue
|
| 239 |
+
url = ""
|
| 240 |
+
body_start = 1
|
| 241 |
+
if len(lines) > 1 and lines[1].startswith("URL:"):
|
| 242 |
+
url = lines[1].removeprefix("URL:").strip()
|
| 243 |
+
body_start = 2
|
| 244 |
+
chunks.append({
|
| 245 |
+
"rank": int(match.group(1)),
|
| 246 |
+
"source": match.group(2).strip(),
|
| 247 |
+
"title": match.group(3).strip(),
|
| 248 |
+
"url": url,
|
| 249 |
+
"score": "",
|
| 250 |
+
"snippet": "\n".join(lines[body_start:]).strip(),
|
| 251 |
+
})
|
| 252 |
+
return chunks[:5]
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def _trim_display_text(text: str, max_chars: int) -> str:
|
| 256 |
+
text = re.sub(r"\s+", " ", text).strip()
|
| 257 |
+
return text if len(text) <= max_chars else text[:max_chars].rstrip() + "..."
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def _first_present(mapping: dict[str, Any], keys: tuple[str, ...], default: Any = None) -> Any:
|
| 261 |
+
for key in keys:
|
| 262 |
+
if key in mapping:
|
| 263 |
+
return mapping[key]
|
| 264 |
+
return default
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def _fmt_component(value: Any) -> str:
|
| 268 |
+
return f"{value:.3f}" if isinstance(value, float) else str(value)
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
_NON_REWARD_METADATA_KEYS = frozenset({
|
| 272 |
+
"step",
|
| 273 |
+
"phase",
|
| 274 |
+
"tool",
|
| 275 |
+
"source_count",
|
| 276 |
+
"error",
|
| 277 |
+
"explore_steps_used",
|
| 278 |
+
"repair_steps_used",
|
| 279 |
+
"sandbox_message",
|
| 280 |
+
"error_codes",
|
| 281 |
+
})
|
| 282 |
+
|
| 283 |
+
_VISIBLE_REWARD_COMPONENTS = {
|
| 284 |
+
"explore": (
|
| 285 |
+
"query_quality",
|
| 286 |
+
"evidence_quality",
|
| 287 |
+
"information_gain",
|
| 288 |
+
"efficiency",
|
| 289 |
+
"explore_total",
|
| 290 |
+
),
|
| 291 |
+
"generate": (
|
| 292 |
+
"validity",
|
| 293 |
+
"task_alignment",
|
| 294 |
+
"structure",
|
| 295 |
+
"research_usage",
|
| 296 |
+
"generate_total",
|
| 297 |
+
),
|
| 298 |
+
"repair": (
|
| 299 |
+
"repair_success",
|
| 300 |
+
"fixed_prior_errors",
|
| 301 |
+
"changed_code",
|
| 302 |
+
"repair_total",
|
| 303 |
+
),
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def parse_reward_components(feedback: str) -> dict[str, Any]:
|
| 308 |
+
"""Fallback parser for older observations that lack reward metadata."""
|
| 309 |
+
dict_match = re.search(r"Reward:\s*(\{.+\})", feedback)
|
| 310 |
+
if dict_match:
|
| 311 |
+
try:
|
| 312 |
+
parsed = ast.literal_eval(dict_match.group(1))
|
| 313 |
+
except (SyntaxError, ValueError):
|
| 314 |
+
pass
|
| 315 |
+
else:
|
| 316 |
+
if isinstance(parsed, dict):
|
| 317 |
+
return {k: v for k, v in parsed.items() if k not in ("step", "phase")}
|
| 318 |
+
|
| 319 |
+
kv_match = re.search(r"Reward:\s*(.+)", feedback)
|
| 320 |
+
if kv_match:
|
| 321 |
+
return _parse_key_value_components(kv_match.group(1))
|
| 322 |
+
return {}
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def _parse_key_value_components(text: str) -> dict[str, Any]:
|
| 326 |
+
components: dict[str, Any] = {}
|
| 327 |
+
for part in text.split(","):
|
| 328 |
+
if "=" not in part:
|
| 329 |
+
continue
|
| 330 |
+
key, value = part.strip().split("=", 1)
|
| 331 |
+
try:
|
| 332 |
+
components[key.strip()] = float(value.strip())
|
| 333 |
+
except ValueError:
|
| 334 |
+
components[key.strip()] = value.strip()
|
| 335 |
+
return components
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
def reward_components(obs_dict: dict[str, Any], feedback: str) -> dict[str, Any]:
|
| 339 |
+
metadata = obs_dict.get("metadata") or {}
|
| 340 |
+
components = {
|
| 341 |
+
key: value
|
| 342 |
+
for key, value in metadata.items()
|
| 343 |
+
if key not in _NON_REWARD_METADATA_KEYS and isinstance(value, (int, float)) and not isinstance(value, bool)
|
| 344 |
+
}
|
| 345 |
+
phase = metadata.get("phase") or obs_dict.get("phase")
|
| 346 |
+
allowed = _VISIBLE_REWARD_COMPONENTS.get(str(phase))
|
| 347 |
+
if allowed:
|
| 348 |
+
visible = {key: components[key] for key in allowed if key in components}
|
| 349 |
+
if visible:
|
| 350 |
+
return visible
|
| 351 |
+
return components or parse_reward_components(feedback)
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def to_obs_dict(obs: Any) -> dict[str, Any]:
|
| 355 |
+
return obs.model_dump() if hasattr(obs, "model_dump") else vars(obs)
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
def fmt_log(log_entries: list[str]) -> str:
|
| 359 |
+
if not log_entries:
|
| 360 |
+
return "*No events yet.*"
|
| 361 |
+
return "```text\n" + "\n".join(log_entries) + "\n```"
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
def obs_summary(obs: dict[str, Any]) -> str:
|
| 365 |
+
return (
|
| 366 |
+
f"**Topic:** {obs.get('topic', '')}\n"
|
| 367 |
+
f"**Tier:** {obs.get('tier', '')}\n"
|
| 368 |
+
f"**Phase:** {obs.get('phase', '')}\n"
|
| 369 |
+
f"**Explore steps left:** {obs.get('explore_steps_left', 0)}\n"
|
| 370 |
+
f"**Keywords:** {obs.get('keywords', '')}\n"
|
| 371 |
+
f"**Data available:** {obs.get('data_available', False)}"
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
def fenced_json(data: dict[str, Any]) -> str:
|
| 376 |
+
return "```json\n" + json.dumps(data, indent=2, ensure_ascii=False) + "\n```"
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def format_explore_action_md(tool: str, query: str, intent: str) -> str:
|
| 380 |
+
return fenced_json({"tool": tool, "query": query, "intent": intent})
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
def format_code_text(code: str) -> str:
|
| 384 |
+
return code or ""
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def common_outputs(
|
| 388 |
+
state: dict[str, Any],
|
| 389 |
+
status: str = "",
|
| 390 |
+
obs_md: str = "",
|
| 391 |
+
feedback: str = "",
|
| 392 |
+
search: str = "",
|
| 393 |
+
) -> tuple[dict[str, Any], str, str, str, str, str, str, list[list[Any]], str, Any]:
|
| 394 |
+
return (
|
| 395 |
+
state,
|
| 396 |
+
fmt_log(state["log"]),
|
| 397 |
+
obs_md,
|
| 398 |
+
feedback,
|
| 399 |
+
state.get("generated_response", ""),
|
| 400 |
+
state.get("parsed_response", ""),
|
| 401 |
+
search,
|
| 402 |
+
build_top_chunks_df(state.get("top_chunks", [])),
|
| 403 |
+
build_reward_summary(state["reward_details"]),
|
| 404 |
+
build_reward_matrix(state["reward_details"]),
|
| 405 |
+
)
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
def llm_outputs(
|
| 409 |
+
state: dict[str, Any],
|
| 410 |
+
status: str = "",
|
| 411 |
+
obs_md: str = "",
|
| 412 |
+
feedback: str = "",
|
| 413 |
+
search: str = "",
|
| 414 |
+
) -> tuple[dict[str, Any], str, str, str, str, str, str, list[list[Any]], str, Any]:
|
| 415 |
+
return common_outputs(state, status=status, obs_md=obs_md, feedback=feedback, search=search)
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
async def do_reset(task_label, state):
|
| 419 |
+
"""Reset the environment and start a new episode."""
|
| 420 |
+
old_sid = state.get("session_id", "")
|
| 421 |
+
if old_sid:
|
| 422 |
+
await SESSION_MGR.close(old_sid)
|
| 423 |
+
|
| 424 |
+
state = empty_state()
|
| 425 |
+
sid = state["session_id"]
|
| 426 |
+
env_url = _resolve_env_url()
|
| 427 |
+
|
| 428 |
+
# Build reset kwargs — pass topic if a specific task was selected
|
| 429 |
+
reset_kwargs: dict[str, Any] = {}
|
| 430 |
+
topic = _TASK_LABEL_TO_TOPIC.get(task_label)
|
| 431 |
+
if topic:
|
| 432 |
+
reset_kwargs["topic"] = topic
|
| 433 |
+
|
| 434 |
+
try:
|
| 435 |
+
env = await SESSION_MGR.get_or_create(sid, env_url)
|
| 436 |
+
result = await env.reset(**reset_kwargs)
|
| 437 |
+
except Exception as e:
|
| 438 |
+
state["log"].append(f"[ERROR] Connection/reset failed: {e}")
|
| 439 |
+
return common_outputs(state, status=f"Error: {e}")
|
| 440 |
+
|
| 441 |
+
obs = result.observation
|
| 442 |
+
obs_dict = to_obs_dict(obs)
|
| 443 |
+
state["obs"] = obs_dict
|
| 444 |
+
state["phase"] = obs.phase
|
| 445 |
+
state["topic"] = obs.topic
|
| 446 |
+
state["tier"] = obs.tier
|
| 447 |
+
state["keywords"] = obs.keywords
|
| 448 |
+
state["content"] = obs.content
|
| 449 |
+
state["data_available"] = obs.data_available
|
| 450 |
+
state["generated_response"] = ""
|
| 451 |
+
state["parsed_response"] = ""
|
| 452 |
+
state["last_code"] = ""
|
| 453 |
+
state["top_chunks"] = []
|
| 454 |
+
state["log"].append(f"[START] topic={obs.topic} tier={obs.tier} phase={obs.phase}")
|
| 455 |
+
|
| 456 |
+
status = f"Reset OK — assigned: {obs.topic} [{obs.tier}]"
|
| 457 |
+
return common_outputs(
|
| 458 |
+
state,
|
| 459 |
+
status=status,
|
| 460 |
+
obs_md=obs_summary(obs_dict),
|
| 461 |
+
feedback=obs.feedback,
|
| 462 |
+
)
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
async def do_explore(tool, query, intent, state):
|
| 466 |
+
"""Execute an explore step."""
|
| 467 |
+
if state.get("done"):
|
| 468 |
+
state["log"].append("[WARN] Episode already done.")
|
| 469 |
+
return common_outputs(state, status="Episode already done.", feedback="Episode already done.")
|
| 470 |
+
if not query.strip():
|
| 471 |
+
return common_outputs(state, status="Empty query — nothing sent.")
|
| 472 |
+
|
| 473 |
+
sid = state.get("session_id", "")
|
| 474 |
+
env_url = _resolve_env_url()
|
| 475 |
+
try:
|
| 476 |
+
env = await SESSION_MGR.get_or_create(sid, env_url)
|
| 477 |
+
except Exception as e:
|
| 478 |
+
state["log"].append(f"[ERROR] Connection failed: {e}")
|
| 479 |
+
return common_outputs(state, status=f"Error: {e}")
|
| 480 |
+
|
| 481 |
+
action = ExplainerAction(
|
| 482 |
+
action_type="explore",
|
| 483 |
+
tool=tool,
|
| 484 |
+
query=query.strip(),
|
| 485 |
+
intent=intent.strip(),
|
| 486 |
+
)
|
| 487 |
+
result = await env.step(action)
|
| 488 |
+
|
| 489 |
+
obs = result.observation
|
| 490 |
+
reward = result.reward or 0.0
|
| 491 |
+
obs_dict = to_obs_dict(obs)
|
| 492 |
+
state["step"] += 1
|
| 493 |
+
state["rewards"].append(reward)
|
| 494 |
+
state["obs"] = obs_dict
|
| 495 |
+
state["phase"] = obs.phase
|
| 496 |
+
state["done"] = result.done
|
| 497 |
+
state["explored_context"] = obs.explored_context
|
| 498 |
+
state["parsed_response"] = format_explore_action_md(tool, query.strip(), intent.strip())
|
| 499 |
+
state["top_chunks"] = extract_top_chunks(obs_dict, obs.search_results)
|
| 500 |
+
|
| 501 |
+
components = reward_components(obs_dict, obs.feedback)
|
| 502 |
+
state["reward_details"].append({
|
| 503 |
+
"step": state["step"],
|
| 504 |
+
"phase": "explore",
|
| 505 |
+
"components": components,
|
| 506 |
+
})
|
| 507 |
+
state["log"].append(
|
| 508 |
+
f'[STEP] step={state["step"]} action=explore:{tool}:"{query[:60]}" reward={reward:.3f} done={result.done}'
|
| 509 |
+
)
|
| 510 |
+
|
| 511 |
+
status = f"Step {state['step']} explore — reward: {reward:.3f}"
|
| 512 |
+
return common_outputs(
|
| 513 |
+
state,
|
| 514 |
+
status=status,
|
| 515 |
+
obs_md=obs_summary(obs_dict),
|
| 516 |
+
feedback=obs.feedback,
|
| 517 |
+
search=obs.search_results,
|
| 518 |
+
)
|
| 519 |
+
|
| 520 |
+
|
| 521 |
+
async def do_generate(fmt, code, narration, state):
|
| 522 |
+
"""Execute a generate step."""
|
| 523 |
+
if state.get("done"):
|
| 524 |
+
state["log"].append("[WARN] Episode already done.")
|
| 525 |
+
return common_outputs(state, status="Episode already done.", feedback="Episode already done.")
|
| 526 |
+
|
| 527 |
+
sid = state.get("session_id", "")
|
| 528 |
+
env_url = _resolve_env_url()
|
| 529 |
+
try:
|
| 530 |
+
env = await SESSION_MGR.get_or_create(sid, env_url)
|
| 531 |
+
except Exception as e:
|
| 532 |
+
state["log"].append(f"[ERROR] Connection failed: {e}")
|
| 533 |
+
return common_outputs(state, status=f"Error: {e}")
|
| 534 |
+
|
| 535 |
+
action_type = "repair" if state.get("phase") == "repair" else "generate"
|
| 536 |
+
action = ExplainerAction(
|
| 537 |
+
action_type=action_type,
|
| 538 |
+
format=fmt,
|
| 539 |
+
code=code,
|
| 540 |
+
narration=narration,
|
| 541 |
+
)
|
| 542 |
+
result = await env.step(action)
|
| 543 |
+
|
| 544 |
+
obs = result.observation
|
| 545 |
+
reward = result.reward or 0.0
|
| 546 |
+
obs_dict = to_obs_dict(obs)
|
| 547 |
+
state["step"] += 1
|
| 548 |
+
state["rewards"].append(reward)
|
| 549 |
+
state["obs"] = obs_dict
|
| 550 |
+
state["phase"] = obs.phase
|
| 551 |
+
state["done"] = result.done
|
| 552 |
+
state["last_code"] = code
|
| 553 |
+
state["last_format"] = fmt
|
| 554 |
+
state["generated_response"] = format_code_text(code)
|
| 555 |
+
state["parsed_response"] = fenced_json({
|
| 556 |
+
"action_type": action_type,
|
| 557 |
+
"format": fmt,
|
| 558 |
+
"code_len": len(code),
|
| 559 |
+
"narration_len": len(narration or ""),
|
| 560 |
+
})
|
| 561 |
+
|
| 562 |
+
components = reward_components(obs_dict, obs.feedback)
|
| 563 |
+
state["reward_details"].append({
|
| 564 |
+
"step": state["step"],
|
| 565 |
+
"phase": action_type,
|
| 566 |
+
"components": components,
|
| 567 |
+
})
|
| 568 |
+
|
| 569 |
+
total_score = normalized_episode_score(sum(state["rewards"]))
|
| 570 |
+
|
| 571 |
+
state["log"].append(
|
| 572 |
+
f"[STEP] step={state['step']} action={action_type}:{fmt} reward={reward:.3f} done={result.done}"
|
| 573 |
+
)
|
| 574 |
+
state["log"].append(
|
| 575 |
+
f"[END] success={total_score >= SUCCESS_SCORE_THRESHOLD} steps={state['step']} "
|
| 576 |
+
f"score={total_score:.3f} rewards={','.join(f'{r:.2f}' for r in state['rewards'])}"
|
| 577 |
+
)
|
| 578 |
+
|
| 579 |
+
status = f"Episode done — score: {total_score:.3f} (generate reward: {reward:.3f})"
|
| 580 |
+
return common_outputs(
|
| 581 |
+
state,
|
| 582 |
+
status=status,
|
| 583 |
+
obs_md=obs_summary(obs_dict),
|
| 584 |
+
feedback=obs.feedback,
|
| 585 |
+
)
|
| 586 |
+
|
| 587 |
+
|
| 588 |
+
def _llm_error_outputs(state: dict[str, Any], message: str):
|
| 589 |
+
state["log"].append(f"[ERROR] {message}")
|
| 590 |
+
state["parsed_response"] = f"**LLM error:** {message}"
|
| 591 |
+
return llm_outputs(
|
| 592 |
+
state,
|
| 593 |
+
obs_md=obs_summary(state.get("obs") or {}) if state.get("obs") else "",
|
| 594 |
+
feedback=(state.get("obs") or {}).get("feedback", ""),
|
| 595 |
+
)
|
| 596 |
+
|
| 597 |
+
|
| 598 |
+
async def do_llm_step(state):
|
| 599 |
+
"""Let the LLM take the next step (explore or generate)."""
|
| 600 |
+
if state.get("done"):
|
| 601 |
+
state["log"].append("[WARN] Episode already done.")
|
| 602 |
+
return llm_outputs(
|
| 603 |
+
state,
|
| 604 |
+
feedback="Episode already done.",
|
| 605 |
+
)
|
| 606 |
+
|
| 607 |
+
from openai import OpenAI
|
| 608 |
+
|
| 609 |
+
api_url, api_key, model = _resolve_llm()
|
| 610 |
+
if not api_url:
|
| 611 |
+
return _llm_error_outputs(state, "API_URL is not configured.")
|
| 612 |
+
if not api_key:
|
| 613 |
+
return _llm_error_outputs(state, "HF_TOKEN or API_KEY is not configured.")
|
| 614 |
+
if not model:
|
| 615 |
+
return _llm_error_outputs(state, "MODEL_NAME is not configured.")
|
| 616 |
+
|
| 617 |
+
client = OpenAI(base_url=api_url, api_key=api_key, timeout=60.0)
|
| 618 |
+
obs_data = state.get("obs", {})
|
| 619 |
+
phase = state.get("phase", "explore")
|
| 620 |
+
llm_response = ""
|
| 621 |
+
|
| 622 |
+
if phase == "explore":
|
| 623 |
+
prompt = build_explore_prompt(
|
| 624 |
+
topic=state["topic"],
|
| 625 |
+
content=state["content"],
|
| 626 |
+
tier=state["tier"],
|
| 627 |
+
keywords=state["keywords"],
|
| 628 |
+
step=state["step"] + 1,
|
| 629 |
+
steps_left=obs_data.get("explore_steps_left", 0),
|
| 630 |
+
explored_context=state.get("explored_context", ""),
|
| 631 |
+
feedback=obs_data.get("feedback", ""),
|
| 632 |
+
)
|
| 633 |
+
try:
|
| 634 |
+
llm_response = call_llm_or_raise(client, prompt, model=model, max_tokens=256)
|
| 635 |
+
except Exception as exc:
|
| 636 |
+
return _llm_error_outputs(state, _format_llm_exception(exc, api_url, model))
|
| 637 |
+
if not llm_response:
|
| 638 |
+
return _llm_error_outputs(
|
| 639 |
+
state,
|
| 640 |
+
f"LLM call failed or returned an empty response from {api_url} using model {model}.",
|
| 641 |
+
)
|
| 642 |
+
|
| 643 |
+
if llm_response.strip().upper() == "SKIP":
|
| 644 |
+
state["log"].append("[LLM] Decided to skip exploration. Moving to generate.")
|
| 645 |
+
state["phase"] = "generate"
|
| 646 |
+
state["generated_response"] = llm_response
|
| 647 |
+
state["parsed_response"] = "`SKIP`"
|
| 648 |
+
return llm_outputs(
|
| 649 |
+
state,
|
| 650 |
+
obs_md=obs_summary(obs_data),
|
| 651 |
+
feedback=obs_data.get("feedback", ""),
|
| 652 |
+
)
|
| 653 |
+
|
| 654 |
+
tool, query, intent = parse_explore_response(llm_response, state["topic"])
|
| 655 |
+
state["generated_response"] = llm_response
|
| 656 |
+
state["parsed_response"] = format_explore_action_md(tool, query, intent)
|
| 657 |
+
state["log"].append(f'[LLM] Explore tool={tool} query="{query[:80]}"')
|
| 658 |
+
(
|
| 659 |
+
s,
|
| 660 |
+
log,
|
| 661 |
+
obs_md,
|
| 662 |
+
feedback,
|
| 663 |
+
generated_response,
|
| 664 |
+
parsed_response,
|
| 665 |
+
search,
|
| 666 |
+
top_chunks,
|
| 667 |
+
reward_summary,
|
| 668 |
+
rewards_table,
|
| 669 |
+
) = await do_explore(
|
| 670 |
+
tool,
|
| 671 |
+
query,
|
| 672 |
+
intent,
|
| 673 |
+
state,
|
| 674 |
+
)
|
| 675 |
+
return (
|
| 676 |
+
s,
|
| 677 |
+
log,
|
| 678 |
+
obs_md,
|
| 679 |
+
feedback,
|
| 680 |
+
generated_response,
|
| 681 |
+
parsed_response,
|
| 682 |
+
search,
|
| 683 |
+
top_chunks,
|
| 684 |
+
reward_summary,
|
| 685 |
+
rewards_table,
|
| 686 |
+
)
|
| 687 |
+
|
| 688 |
+
elif phase in ("generate", "repair", "done"):
|
| 689 |
+
if phase == "repair":
|
| 690 |
+
prompt = build_repair_prompt(
|
| 691 |
+
topic=state["topic"],
|
| 692 |
+
tier=state["tier"],
|
| 693 |
+
fmt=state.get("last_format", "marimo"),
|
| 694 |
+
previous_code=state.get("last_code", ""),
|
| 695 |
+
last_errors=obs_data.get("last_errors", ""),
|
| 696 |
+
)
|
| 697 |
+
else:
|
| 698 |
+
prompt = build_generate_prompt(
|
| 699 |
+
topic=state["topic"],
|
| 700 |
+
content=state["content"],
|
| 701 |
+
tier=state["tier"],
|
| 702 |
+
keywords=state["keywords"],
|
| 703 |
+
data_available=state.get("data_available", False),
|
| 704 |
+
explored_context=state.get("explored_context", ""),
|
| 705 |
+
)
|
| 706 |
+
try:
|
| 707 |
+
llm_response = call_llm_or_raise(client, prompt, model=model, max_tokens=4096)
|
| 708 |
+
except Exception as exc:
|
| 709 |
+
return _llm_error_outputs(state, _format_llm_exception(exc, api_url, model))
|
| 710 |
+
if not llm_response:
|
| 711 |
+
return _llm_error_outputs(
|
| 712 |
+
state,
|
| 713 |
+
f"LLM call failed or returned an empty response from {api_url} using model {model}.",
|
| 714 |
+
)
|
| 715 |
+
|
| 716 |
+
fmt, code, narration = parse_generate_response(llm_response)
|
| 717 |
+
state["generated_response"] = format_code_text(code)
|
| 718 |
+
state["parsed_response"] = fenced_json({
|
| 719 |
+
"format": fmt,
|
| 720 |
+
"code_len": len(code),
|
| 721 |
+
"narration_len": len(narration),
|
| 722 |
+
})
|
| 723 |
+
state["log"].append(f"[LLM] Generate: format={fmt}, code_len={len(code)}")
|
| 724 |
+
(
|
| 725 |
+
s,
|
| 726 |
+
log,
|
| 727 |
+
obs_md,
|
| 728 |
+
feedback,
|
| 729 |
+
generated_response,
|
| 730 |
+
parsed_response,
|
| 731 |
+
search,
|
| 732 |
+
top_chunks,
|
| 733 |
+
reward_summary,
|
| 734 |
+
rewards_table,
|
| 735 |
+
) = await do_generate(
|
| 736 |
+
fmt,
|
| 737 |
+
code,
|
| 738 |
+
narration,
|
| 739 |
+
state,
|
| 740 |
+
)
|
| 741 |
+
return (
|
| 742 |
+
s,
|
| 743 |
+
log,
|
| 744 |
+
obs_md,
|
| 745 |
+
feedback,
|
| 746 |
+
generated_response,
|
| 747 |
+
parsed_response,
|
| 748 |
+
search,
|
| 749 |
+
top_chunks,
|
| 750 |
+
reward_summary,
|
| 751 |
+
rewards_table,
|
| 752 |
+
)
|
| 753 |
+
|
| 754 |
+
return llm_outputs(state)
|
| 755 |
+
|
| 756 |
+
|
| 757 |
+
async def do_llm_auto(state):
|
| 758 |
+
"""Run full episode automatically with LLM (explore + generate)."""
|
| 759 |
+
outputs = None
|
| 760 |
+
while not state.get("done"):
|
| 761 |
+
outputs = await do_llm_step(state)
|
| 762 |
+
state = outputs[0]
|
| 763 |
+
if state.get("log") and str(state["log"][-1]).startswith("[ERROR]"):
|
| 764 |
+
break
|
| 765 |
+
return outputs if outputs else llm_outputs(state, status="No steps taken.")
|
| 766 |
+
|
| 767 |
+
|
| 768 |
+
# ---------------------------------------------------------------------------
|
| 769 |
+
# Gradio UI
|
| 770 |
+
# ---------------------------------------------------------------------------
|
| 771 |
+
|
| 772 |
+
|
| 773 |
+
def build_ui():
|
| 774 |
+
with gr.Blocks(title="Explainer Env — Interactive Runner") as demo:
|
| 775 |
+
session_state = gr.State(empty_state())
|
| 776 |
+
|
| 777 |
+
# Header
|
| 778 |
+
gr.Markdown("# Explainer Episode Inspector")
|
| 779 |
+
|
| 780 |
+
# =====================================================================
|
| 781 |
+
# Controls
|
| 782 |
+
# =====================================================================
|
| 783 |
+
with gr.Row(equal_height=True):
|
| 784 |
+
task_dd = gr.Dropdown(
|
| 785 |
+
choices=TASK_CHOICES,
|
| 786 |
+
value="(random)",
|
| 787 |
+
label="Task",
|
| 788 |
+
scale=1,
|
| 789 |
+
)
|
| 790 |
+
|
| 791 |
+
with gr.Row(equal_height=True):
|
| 792 |
+
reset_btn = gr.Button("Reset Episode", variant="primary")
|
| 793 |
+
llm_step_btn = gr.Button("Next Step", variant="secondary")
|
| 794 |
+
llm_auto_btn = gr.Button("Auto Run", variant="primary")
|
| 795 |
+
|
| 796 |
+
# =====================================================================
|
| 797 |
+
# Inspector panels
|
| 798 |
+
# =====================================================================
|
| 799 |
+
with gr.Row(equal_height=False):
|
| 800 |
+
with gr.Column(scale=1):
|
| 801 |
+
with gr.Group():
|
| 802 |
+
gr.Markdown("### Observation")
|
| 803 |
+
obs_md = gr.Markdown("*Click Reset Episode to begin.*")
|
| 804 |
+
feedback_box = gr.Textbox(
|
| 805 |
+
label="Latest feedback",
|
| 806 |
+
lines=8,
|
| 807 |
+
max_lines=8,
|
| 808 |
+
interactive=False,
|
| 809 |
+
)
|
| 810 |
+
with gr.Column(scale=1):
|
| 811 |
+
with gr.Group():
|
| 812 |
+
gr.Markdown("### LLM")
|
| 813 |
+
with gr.Tabs():
|
| 814 |
+
with gr.Tab("Parsed"):
|
| 815 |
+
parsed_response_box = gr.Markdown("*No parsed response yet.*")
|
| 816 |
+
with gr.Tab("Response / code"):
|
| 817 |
+
generated_response_box = gr.Textbox(
|
| 818 |
+
label="Raw response or generated code",
|
| 819 |
+
value="No response yet.",
|
| 820 |
+
lines=16,
|
| 821 |
+
max_lines=16,
|
| 822 |
+
interactive=False,
|
| 823 |
+
buttons=["copy"],
|
| 824 |
+
)
|
| 825 |
+
|
| 826 |
+
with gr.Row(equal_height=False):
|
| 827 |
+
with gr.Column(scale=1):
|
| 828 |
+
with gr.Group():
|
| 829 |
+
gr.Markdown("### Research")
|
| 830 |
+
search_box = gr.Textbox(
|
| 831 |
+
label="Latest search results",
|
| 832 |
+
lines=8,
|
| 833 |
+
max_lines=8,
|
| 834 |
+
interactive=False,
|
| 835 |
+
)
|
| 836 |
+
top_chunks_table = gr.Dataframe(
|
| 837 |
+
headers=["Rank", "Source", "Title", "Score", "URL", "Snippet"],
|
| 838 |
+
interactive=False,
|
| 839 |
+
column_count=(6, "fixed"),
|
| 840 |
+
label="Top chunks",
|
| 841 |
+
)
|
| 842 |
+
with gr.Column(scale=1):
|
| 843 |
+
with gr.Group():
|
| 844 |
+
gr.Markdown("### Rewards")
|
| 845 |
+
reward_summary = gr.Markdown("*No rewards yet.*")
|
| 846 |
+
rewards_table = gr.Dataframe(
|
| 847 |
+
headers=["Reward"],
|
| 848 |
+
interactive=False,
|
| 849 |
+
column_count=(1, "fixed"),
|
| 850 |
+
label="Reward matrix",
|
| 851 |
+
)
|
| 852 |
+
|
| 853 |
+
# =====================================================================
|
| 854 |
+
# Timeline
|
| 855 |
+
# =====================================================================
|
| 856 |
+
with gr.Group():
|
| 857 |
+
gr.Markdown("### Timeline")
|
| 858 |
+
log_box = gr.Markdown("*No events yet.*")
|
| 859 |
+
|
| 860 |
+
# =====================================================================
|
| 861 |
+
# Wiring
|
| 862 |
+
# =====================================================================
|
| 863 |
+
# Common outputs: state, log, obs, feedback, search, rewards
|
| 864 |
+
common_output_components = [
|
| 865 |
+
session_state,
|
| 866 |
+
log_box,
|
| 867 |
+
obs_md,
|
| 868 |
+
feedback_box,
|
| 869 |
+
generated_response_box,
|
| 870 |
+
parsed_response_box,
|
| 871 |
+
search_box,
|
| 872 |
+
top_chunks_table,
|
| 873 |
+
reward_summary,
|
| 874 |
+
rewards_table,
|
| 875 |
+
]
|
| 876 |
+
|
| 877 |
+
reset_btn.click(
|
| 878 |
+
fn=do_reset,
|
| 879 |
+
inputs=[task_dd, session_state],
|
| 880 |
+
outputs=common_output_components,
|
| 881 |
+
)
|
| 882 |
+
|
| 883 |
+
llm_step_btn.click(
|
| 884 |
+
fn=do_llm_step,
|
| 885 |
+
inputs=[session_state],
|
| 886 |
+
outputs=common_output_components,
|
| 887 |
+
)
|
| 888 |
+
|
| 889 |
+
llm_auto_btn.click(
|
| 890 |
+
fn=do_llm_auto,
|
| 891 |
+
inputs=[session_state],
|
| 892 |
+
outputs=common_output_components,
|
| 893 |
+
)
|
| 894 |
+
|
| 895 |
+
return demo
|
| 896 |
+
|
| 897 |
+
|
| 898 |
+
if __name__ == "__main__":
|
| 899 |
+
demo = build_ui()
|
| 900 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
dashboard_prompts.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Prompt builders for the Gradio dashboard."""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import textwrap
|
| 5 |
+
|
| 6 |
+
try:
|
| 7 |
+
from .constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS
|
| 8 |
+
except ImportError: # pragma: no cover
|
| 9 |
+
from constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
SYSTEM_PROMPT = textwrap.dedent("""\
|
| 13 |
+
You are an expert educator that creates interactive explanations of technical topics.
|
| 14 |
+
|
| 15 |
+
You interact with an environment in two phases:
|
| 16 |
+
|
| 17 |
+
## Phase 1: EXPLORE
|
| 18 |
+
Search for relevant information. You'll be given a topic + tier (beginner/intermediate/advanced).
|
| 19 |
+
- Start from `search_wikipedia` for the topic overview, terminology, equations,
|
| 20 |
+
references, and branch keywords.
|
| 21 |
+
- Then use what you learned from Wikipedia/top chunks to choose the next search
|
| 22 |
+
avenue: arXiv/Scholar/HF papers for deeper sources, `fetch_docs` for
|
| 23 |
+
Marimo/Manim/API/code patterns, and HF Hub for model/dataset/examples.
|
| 24 |
+
- Decide search queries to gather relevant material
|
| 25 |
+
- Choose one explicit research tool:
|
| 26 |
+
- search_wikipedia: fundamentals and beginner explanations
|
| 27 |
+
- search_hf_papers: ML/AI papers from Hugging Face Papers
|
| 28 |
+
- search_arxiv: scientific/math/ML papers from arXiv
|
| 29 |
+
- search_scholar: paper metadata, abstracts, citations
|
| 30 |
+
- fetch_docs: library/API documentation for code, plots, Marimo, Manim
|
| 31 |
+
- search_hf_hub: model cards, datasets, Spaces, examples
|
| 32 |
+
- Explore for what the generated code needs: formulas, pseudocode, visual intuition,
|
| 33 |
+
implementation examples, and Marimo/Manim/API patterns.
|
| 34 |
+
- Use `fetch_docs` when you need code examples or interactive artifact patterns.
|
| 35 |
+
Do not repeat broad Wikipedia/paper overview searches when code-oriented context is missing.
|
| 36 |
+
- You have up to {MAX_EXPLORE_STEPS} explore steps. Stop early if you have enough info.
|
| 37 |
+
|
| 38 |
+
## Phase 2: GENERATE
|
| 39 |
+
Produce a complete, runnable Python file in one of two formats:
|
| 40 |
+
|
| 41 |
+
### marimo notebook format (STRICT)
|
| 42 |
+
First line: `import marimo`
|
| 43 |
+
Second line: `app = marimo.App()`
|
| 44 |
+
Use `@app.cell` functions, import shared libraries in the first cell, return shared
|
| 45 |
+
variables explicitly, and use underscore-prefixed scratch variables by default to
|
| 46 |
+
avoid MB002. Last line: `if __name__ == "__main__": app.run()`.
|
| 47 |
+
|
| 48 |
+
### manim animation format
|
| 49 |
+
Use a Scene class with `construct()`, `self.play()`, and `self.wait()`.
|
| 50 |
+
|
| 51 |
+
## Phase 3: REPAIR
|
| 52 |
+
If validation fails, submit a revised complete file using the exact error feedback.
|
| 53 |
+
|
| 54 |
+
For EXPLORE actions, respond with a JSON object:
|
| 55 |
+
```json
|
| 56 |
+
{
|
| 57 |
+
"tool": "search_wikipedia | search_hf_papers | search_arxiv | search_scholar | fetch_docs | search_hf_hub",
|
| 58 |
+
"query": "search query",
|
| 59 |
+
"intent": "what you need from this source"
|
| 60 |
+
}
|
| 61 |
+
```
|
| 62 |
+
For GENERATE actions, respond with a JSON object:
|
| 63 |
+
```json
|
| 64 |
+
{
|
| 65 |
+
"format": "marimo" or "manim",
|
| 66 |
+
"code": "complete Python source code",
|
| 67 |
+
"narration": "scene narration (manim only, empty string for marimo)"
|
| 68 |
+
}
|
| 69 |
+
```
|
| 70 |
+
""").replace("{MAX_EXPLORE_STEPS}", str(MAX_EXPLORE_STEPS)).replace(
|
| 71 |
+
"{MAX_REPAIR_STEPS}",
|
| 72 |
+
str(MAX_REPAIR_STEPS),
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def build_explore_prompt(
|
| 77 |
+
topic: str,
|
| 78 |
+
content: str,
|
| 79 |
+
tier: str,
|
| 80 |
+
keywords: str,
|
| 81 |
+
step: int,
|
| 82 |
+
steps_left: int,
|
| 83 |
+
explored_context: str,
|
| 84 |
+
feedback: str,
|
| 85 |
+
) -> str:
|
| 86 |
+
return textwrap.dedent(f"""\
|
| 87 |
+
TOPIC: {topic}
|
| 88 |
+
TIER: {tier}
|
| 89 |
+
KEYWORDS: {keywords}
|
| 90 |
+
DESCRIPTION: {content}
|
| 91 |
+
|
| 92 |
+
PHASE: EXPLORE (step {step}, {steps_left} steps left)
|
| 93 |
+
PREVIOUS RESEARCH:
|
| 94 |
+
{explored_context or "(none yet)"}
|
| 95 |
+
|
| 96 |
+
FEEDBACK: {feedback}
|
| 97 |
+
|
| 98 |
+
Provide a search query to find relevant information about this topic.
|
| 99 |
+
If this is the first explore step, use `search_wikipedia` for the starting overview.
|
| 100 |
+
On later explore steps, use prior research/top chunks to branch into papers, docs,
|
| 101 |
+
examples, references, or APIs. Prefer queries/intents that will help write the final
|
| 102 |
+
interactive code: equations, pseudocode, visual examples, implementation details,
|
| 103 |
+
or Marimo/Manim docs.
|
| 104 |
+
If you already have enough context, respond with just: SKIP
|
| 105 |
+
Otherwise respond with the JSON object described in the system prompt.
|
| 106 |
+
""")
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def build_generate_prompt(
|
| 110 |
+
topic: str,
|
| 111 |
+
content: str,
|
| 112 |
+
tier: str,
|
| 113 |
+
keywords: str,
|
| 114 |
+
data_available: bool,
|
| 115 |
+
explored_context: str,
|
| 116 |
+
) -> str:
|
| 117 |
+
format_hint = ""
|
| 118 |
+
if data_available:
|
| 119 |
+
format_hint = "This topic has associated data - consider marimo with data visualizations."
|
| 120 |
+
return textwrap.dedent(f"""\
|
| 121 |
+
TOPIC: {topic}
|
| 122 |
+
TIER: {tier}
|
| 123 |
+
KEYWORDS: {keywords}
|
| 124 |
+
DESCRIPTION: {content}
|
| 125 |
+
DATA AVAILABLE: {data_available}
|
| 126 |
+
{format_hint}
|
| 127 |
+
|
| 128 |
+
ACCUMULATED RESEARCH:
|
| 129 |
+
{explored_context or "(no research done)"}
|
| 130 |
+
|
| 131 |
+
PHASE: GENERATE
|
| 132 |
+
Create a complete, runnable interactive explanation. Choose the best format (marimo or manim).
|
| 133 |
+
|
| 134 |
+
Respond with a JSON object:
|
| 135 |
+
```json
|
| 136 |
+
{{
|
| 137 |
+
"format": "marimo" or "manim",
|
| 138 |
+
"code": "complete Python source code here",
|
| 139 |
+
"narration": "scene-by-scene narration (manim only, empty for marimo)"
|
| 140 |
+
}}
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
Requirements:
|
| 144 |
+
- For marimo: first line `import marimo`, second line `app = marimo.App()`,
|
| 145 |
+
every cell has an explicit return, scratch variables use underscore prefixes,
|
| 146 |
+
and the file ends with `if __name__ == "__main__": app.run()`.
|
| 147 |
+
- For manim: Scene class with construct(), self.play() animations, MathTex for math.
|
| 148 |
+
- Cover the key concepts from the keywords.
|
| 149 |
+
- Match the depth to the tier level ({tier}).
|
| 150 |
+
- Incorporate findings from the research above.
|
| 151 |
+
""")
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def build_repair_prompt(
|
| 155 |
+
topic: str,
|
| 156 |
+
tier: str,
|
| 157 |
+
fmt: str,
|
| 158 |
+
previous_code: str,
|
| 159 |
+
last_errors: str,
|
| 160 |
+
) -> str:
|
| 161 |
+
return textwrap.dedent(f"""\
|
| 162 |
+
TOPIC: {topic}
|
| 163 |
+
TIER: {tier}
|
| 164 |
+
FORMAT: {fmt}
|
| 165 |
+
|
| 166 |
+
The previous generated artifact failed validation.
|
| 167 |
+
|
| 168 |
+
ERROR FEEDBACK:
|
| 169 |
+
{last_errors}
|
| 170 |
+
|
| 171 |
+
PREVIOUS CODE:
|
| 172 |
+
```python
|
| 173 |
+
{previous_code}
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
Submit a corrected complete Python file. Respond with the same JSON shape used
|
| 177 |
+
for generation: format, code, narration.
|
| 178 |
+
|
| 179 |
+
If the error is MB002, do a full-file variable audit before answering. Fix the
|
| 180 |
+
assignment names and loop variable names, not just the return values.
|
| 181 |
+
""")
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def parse_generate_response(response: str) -> tuple[str, str, str]:
|
| 185 |
+
text = response.strip()
|
| 186 |
+
if "```json" in text:
|
| 187 |
+
text = text.split("```json", 1)[1].split("```", 1)[0].strip()
|
| 188 |
+
elif "```" in text:
|
| 189 |
+
text = text.split("```", 1)[1].split("```", 1)[0].strip()
|
| 190 |
+
|
| 191 |
+
try:
|
| 192 |
+
data = json.loads(text)
|
| 193 |
+
return data.get("format", "marimo"), data.get("code", ""), data.get("narration", "")
|
| 194 |
+
except json.JSONDecodeError:
|
| 195 |
+
if "from manim" in response or ("class " in response and "Scene" in response):
|
| 196 |
+
return "manim", response, ""
|
| 197 |
+
return "marimo", response, ""
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def parse_explore_response(response: str, fallback_query: str) -> tuple[str, str, str]:
|
| 201 |
+
text = response.strip()
|
| 202 |
+
if "```json" in text:
|
| 203 |
+
text = text.split("```json", 1)[1].split("```", 1)[0].strip()
|
| 204 |
+
elif text.startswith("```"):
|
| 205 |
+
text = text.split("```", 1)[1].split("```", 1)[0].strip()
|
| 206 |
+
|
| 207 |
+
try:
|
| 208 |
+
data = json.loads(text)
|
| 209 |
+
return (
|
| 210 |
+
data.get("tool", "search_wikipedia"),
|
| 211 |
+
data.get("query", fallback_query),
|
| 212 |
+
data.get("intent", "gather background and examples"),
|
| 213 |
+
)
|
| 214 |
+
except json.JSONDecodeError:
|
| 215 |
+
return "search_wikipedia", fallback_query, "gather background and examples"
|
server/app.py
CHANGED
|
@@ -29,9 +29,7 @@ Usage:
|
|
| 29 |
"""
|
| 30 |
|
| 31 |
import os
|
| 32 |
-
import sys
|
| 33 |
from contextlib import asynccontextmanager
|
| 34 |
-
from pathlib import Path
|
| 35 |
|
| 36 |
try:
|
| 37 |
from openenv.core.env_server.http_server import create_app
|
|
@@ -50,11 +48,10 @@ except ImportError:
|
|
| 50 |
|
| 51 |
def _build_dashboard_tab(*_args, **_kwargs):
|
| 52 |
"""Return the project dashboard as an OpenEnv custom web-interface tab."""
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
from dashboard import build_ui
|
| 58 |
|
| 59 |
return build_ui()
|
| 60 |
|
|
|
|
| 29 |
"""
|
| 30 |
|
| 31 |
import os
|
|
|
|
| 32 |
from contextlib import asynccontextmanager
|
|
|
|
| 33 |
|
| 34 |
try:
|
| 35 |
from openenv.core.env_server.http_server import create_app
|
|
|
|
| 48 |
|
| 49 |
def _build_dashboard_tab(*_args, **_kwargs):
|
| 50 |
"""Return the project dashboard as an OpenEnv custom web-interface tab."""
|
| 51 |
+
try:
|
| 52 |
+
from ..dashboard import build_ui
|
| 53 |
+
except ImportError: # pragma: no cover - supports uvicorn server.app:app
|
| 54 |
+
from dashboard import build_ui
|
|
|
|
| 55 |
|
| 56 |
return build_ui()
|
| 57 |
|