Spaces:
Sleeping
Sleeping
Sibam commited on
Commit ·
8fbc201
1
Parent(s): c3314b1
fix: update Guided Demo output to show Anthropic dataset instead of synthetic placeholder
Browse files- server/app.py +3 -3
server/app.py
CHANGED
|
@@ -190,17 +190,17 @@ if ENABLE_WEB_INTERFACE:
|
|
| 190 |
import pandas as pd
|
| 191 |
# Step 1
|
| 192 |
df1 = pd.DataFrame([{"Step": 1, "Reward": 0.2}])
|
| 193 |
-
yield df1, "Poor response, lacks relevance", "Focus on correctness", "### Agent Process (Demo):\n- Parsing standard input features\n- Decision strongly contradicted gold labels\n- Issuing negative reinforcement penalty", "Dataset: <b>
|
| 194 |
time.sleep(2)
|
| 195 |
|
| 196 |
# Step 2
|
| 197 |
df2 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}])
|
| 198 |
-
yield df2, "Decent response but can be improved in clarity", "Improve structure and clarity", "### Agent Process (Demo):\n- Compared Response A and B against Gold Standard\n- Decision showed partial alignment\n- Issuing moderate reinforcement", "Dataset: <b>
|
| 199 |
time.sleep(2)
|
| 200 |
|
| 201 |
# Step 3
|
| 202 |
df3 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}, {"Step": 3, "Reward": 0.99}])
|
| 203 |
-
yield df3, "High quality response, well aligned with user intent", "Try making the response more concise", "### Agent Process (Demo):\n- Evaluated response on 4 heuristic axes
|
| 204 |
|
| 205 |
demo_btn.click(
|
| 206 |
fn=run_demo_mode,
|
|
|
|
| 190 |
import pandas as pd
|
| 191 |
# Step 1
|
| 192 |
df1 = pd.DataFrame([{"Step": 1, "Reward": 0.2}])
|
| 193 |
+
yield df1, "Poor response, lacks relevance", "Focus on correctness", "### Agent Process (Demo):\n- Parsing standard input features\n- Decision strongly contradicted gold labels\n- Issuing negative reinforcement penalty", "Dataset: <b>ANTHROPIC/HH-RLHF</b>", "### Episode Summary\n- **Final Reward:** 0.20\n- **Improvement:** 0.0%\n- **Steps:** 1", "### Best Reward: 0.20", "### Recent Delta: --", "### Confidence: 20%"
|
| 194 |
time.sleep(2)
|
| 195 |
|
| 196 |
# Step 2
|
| 197 |
df2 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}])
|
| 198 |
+
yield df2, "Decent response but can be improved in clarity", "Improve structure and clarity", "### Agent Process (Demo):\n- Compared Response A and B against Gold Standard\n- Decision showed partial alignment\n- Issuing moderate reinforcement", "Dataset: <b>ANTHROPIC/HH-RLHF</b>", "### Episode Summary\n- **Final Reward:** 0.55\n- **Improvement:** +175.0%\n- **Steps:** 2", "### Best Reward: 0.55", "### Recent Delta: +0.35", "### Confidence: 60%"
|
| 199 |
time.sleep(2)
|
| 200 |
|
| 201 |
# Step 3
|
| 202 |
df3 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}, {"Step": 3, "Reward": 0.99}])
|
| 203 |
+
yield df3, "High quality response, well aligned with user intent", "Try making the response more concise", "### Agent Process (Demo):\n- Evaluated response on 4 heuristic axes\n- Decision matched gold labels almost perfectly\n- Issuing high positive reinforcement", "Dataset: <b>ANTHROPIC/HH-RLHF</b>", "### Episode Summary\n- **Final Reward:** 0.99\n- **Improvement:** +395.0%\n- **Steps:** 3", "### Best Reward: 0.99", "### Recent Delta: +0.44", "### Confidence: 95%"
|
| 204 |
|
| 205 |
demo_btn.click(
|
| 206 |
fn=run_demo_mode,
|