Sibam commited on
Commit
8fbc201
·
1 Parent(s): c3314b1

fix: update Guided Demo output to show Anthropic dataset instead of synthetic placeholder

Browse files
Files changed (1) hide show
  1. server/app.py +3 -3
server/app.py CHANGED
@@ -190,17 +190,17 @@ if ENABLE_WEB_INTERFACE:
190
  import pandas as pd
191
  # Step 1
192
  df1 = pd.DataFrame([{"Step": 1, "Reward": 0.2}])
193
- yield df1, "Poor response, lacks relevance", "Focus on correctness", "### Agent Process (Demo):\n- Parsing standard input features\n- Decision strongly contradicted gold labels\n- Issuing negative reinforcement penalty", "Dataset: <b>SYNTHETIC DEMO</b>", "### Episode Summary\n- **Final Reward:** 0.20\n- **Improvement:** 0.0%\n- **Steps:** 1", "### Best Reward: 0.20", "### Recent Delta: --", "### Confidence: 20%"
194
  time.sleep(2)
195
 
196
  # Step 2
197
  df2 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}])
198
- yield df2, "Decent response but can be improved in clarity", "Improve structure and clarity", "### Agent Process (Demo):\n- Compared Response A and B against Gold Standard\n- Decision showed partial alignment\n- Issuing moderate reinforcement", "Dataset: <b>SYNTHETIC DEMO</b>", "### Episode Summary\n- **Final Reward:** 0.55\n- **Improvement:** +175.0%\n- **Steps:** 2", "### Best Reward: 0.55", "### Recent Delta: +0.35", "### Confidence: 60%"
199
  time.sleep(2)
200
 
201
  # Step 3
202
  df3 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}, {"Step": 3, "Reward": 0.99}])
203
- yield df3, "High quality response, well aligned with user intent", "Try making the response more concise", "### Agent Process (Demo):\n- Evaluated response on 4 heuristic axes (Helpfulness, Honesty, etc)\n- Decision matched gold labels almost perfectly\n- Issuing high positive reinforcement", "Dataset: <b>SYNTHETIC DEMO</b>", "### Episode Summary\n- **Final Reward:** 0.99\n- **Improvement:** +395.0%\n- **Steps:** 3", "### Best Reward: 0.99", "### Recent Delta: +0.44", "### Confidence: 95%"
204
 
205
  demo_btn.click(
206
  fn=run_demo_mode,
 
190
  import pandas as pd
191
  # Step 1
192
  df1 = pd.DataFrame([{"Step": 1, "Reward": 0.2}])
193
+ yield df1, "Poor response, lacks relevance", "Focus on correctness", "### Agent Process (Demo):\n- Parsing standard input features\n- Decision strongly contradicted gold labels\n- Issuing negative reinforcement penalty", "Dataset: <b>ANTHROPIC/HH-RLHF</b>", "### Episode Summary\n- **Final Reward:** 0.20\n- **Improvement:** 0.0%\n- **Steps:** 1", "### Best Reward: 0.20", "### Recent Delta: --", "### Confidence: 20%"
194
  time.sleep(2)
195
 
196
  # Step 2
197
  df2 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}])
198
+ yield df2, "Decent response but can be improved in clarity", "Improve structure and clarity", "### Agent Process (Demo):\n- Compared Response A and B against Gold Standard\n- Decision showed partial alignment\n- Issuing moderate reinforcement", "Dataset: <b>ANTHROPIC/HH-RLHF</b>", "### Episode Summary\n- **Final Reward:** 0.55\n- **Improvement:** +175.0%\n- **Steps:** 2", "### Best Reward: 0.55", "### Recent Delta: +0.35", "### Confidence: 60%"
199
  time.sleep(2)
200
 
201
  # Step 3
202
  df3 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}, {"Step": 3, "Reward": 0.99}])
203
+ yield df3, "High quality response, well aligned with user intent", "Try making the response more concise", "### Agent Process (Demo):\n- Evaluated response on 4 heuristic axes\n- Decision matched gold labels almost perfectly\n- Issuing high positive reinforcement", "Dataset: <b>ANTHROPIC/HH-RLHF</b>", "### Episode Summary\n- **Final Reward:** 0.99\n- **Improvement:** +395.0%\n- **Steps:** 3", "### Best Reward: 0.99", "### Recent Delta: +0.44", "### Confidence: 95%"
204
 
205
  demo_btn.click(
206
  fn=run_demo_mode,