Spaces:

Dev-CrafterX
/

preference-lab

Sleeping

App Files Files Community

Sibam commited on Apr 8

Commit

8fbc201

1 Parent(s): c3314b1

fix: update Guided Demo output to show Anthropic dataset instead of synthetic placeholder

Browse files

Files changed (1) hide show

server/app.py +3 -3

server/app.py CHANGED Viewed

@@ -190,17 +190,17 @@ if ENABLE_WEB_INTERFACE:
                     import pandas as pd
                     # Step 1
                     df1 = pd.DataFrame([{"Step": 1, "Reward": 0.2}])
-                    yield df1, "Poor response, lacks relevance", "Focus on correctness", "### Agent Process (Demo):\n- Parsing standard input features\n- Decision strongly contradicted gold labels\n- Issuing negative reinforcement penalty", "Dataset: <b>SYNTHETIC DEMO</b>", "### Episode Summary\n- **Final Reward:** 0.20\n- **Improvement:** 0.0%\n- **Steps:** 1", "### Best Reward: 0.20", "### Recent Delta: --", "### Confidence: 20%"
                     time.sleep(2)
                     # Step 2
                     df2 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}])
-                    yield df2, "Decent response but can be improved in clarity", "Improve structure and clarity", "### Agent Process (Demo):\n- Compared Response A and B against Gold Standard\n- Decision showed partial alignment\n- Issuing moderate reinforcement", "Dataset: <b>SYNTHETIC DEMO</b>", "### Episode Summary\n- **Final Reward:** 0.55\n- **Improvement:** +175.0%\n- **Steps:** 2", "### Best Reward: 0.55", "### Recent Delta: +0.35", "### Confidence: 60%"
                     time.sleep(2)
                     # Step 3
                     df3 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}, {"Step": 3, "Reward": 0.99}])
-                    yield df3, "High quality response, well aligned with user intent", "Try making the response more concise", "### Agent Process (Demo):\n- Evaluated response on 4 heuristic axes (Helpfulness, Honesty, etc)\n- Decision matched gold labels almost perfectly\n- Issuing high positive reinforcement", "Dataset: <b>SYNTHETIC DEMO</b>", "### Episode Summary\n- **Final Reward:** 0.99\n- **Improvement:** +395.0%\n- **Steps:** 3", "### Best Reward: 0.99", "### Recent Delta: +0.44", "### Confidence: 95%"
                 demo_btn.click(
                     fn=run_demo_mode,

                     import pandas as pd
                     # Step 1
                     df1 = pd.DataFrame([{"Step": 1, "Reward": 0.2}])
+                    yield df1, "Poor response, lacks relevance", "Focus on correctness", "### Agent Process (Demo):\n- Parsing standard input features\n- Decision strongly contradicted gold labels\n- Issuing negative reinforcement penalty", "Dataset: <b>ANTHROPIC/HH-RLHF</b>", "### Episode Summary\n- **Final Reward:** 0.20\n- **Improvement:** 0.0%\n- **Steps:** 1", "### Best Reward: 0.20", "### Recent Delta: --", "### Confidence: 20%"
                     time.sleep(2)
                     # Step 2
                     df2 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}])
+                    yield df2, "Decent response but can be improved in clarity", "Improve structure and clarity", "### Agent Process (Demo):\n- Compared Response A and B against Gold Standard\n- Decision showed partial alignment\n- Issuing moderate reinforcement", "Dataset: <b>ANTHROPIC/HH-RLHF</b>", "### Episode Summary\n- **Final Reward:** 0.55\n- **Improvement:** +175.0%\n- **Steps:** 2", "### Best Reward: 0.55", "### Recent Delta: +0.35", "### Confidence: 60%"
                     time.sleep(2)
                     # Step 3
                     df3 = pd.DataFrame([{"Step": 1, "Reward": 0.2}, {"Step": 2, "Reward": 0.55}, {"Step": 3, "Reward": 0.99}])
+                    yield df3, "High quality response, well aligned with user intent", "Try making the response more concise", "### Agent Process (Demo):\n- Evaluated response on 4 heuristic axes\n- Decision matched gold labels almost perfectly\n- Issuing high positive reinforcement", "Dataset: <b>ANTHROPIC/HH-RLHF</b>", "### Episode Summary\n- **Final Reward:** 0.99\n- **Improvement:** +395.0%\n- **Steps:** 3", "### Best Reward: 0.99", "### Recent Delta: +0.44", "### Confidence: 95%"
                 demo_btn.click(
                     fn=run_demo_mode,