Spaces:

akshaypulla
/

procure-rl

Sleeping

App Files Files Community

akshaypulla commited on Apr 8

Commit

e3cc9b2

verified ·

1 Parent(s): 39f9ab0

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

server/app.py +42 -130

server/app.py CHANGED Viewed

@@ -36,6 +36,17 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 try:
     from openenv.core.env_server.http_server import create_app
 except Exception as e:
     raise ImportError(
         "openenv is required for the web interface. Install dependencies with '\n    uv sync\n'"
@@ -62,7 +73,6 @@ def build_custom_gradio_ui(
     readme_content = _load_readme_content(metadata)
     display_title = metadata.name if metadata else title
-    # Custom Quick Start (override OpenEnv default which uses port 8000)
     custom_quick_start_md = """### Connect to this environment
 Connect from Python using `ProcureRLEnv`:
@@ -86,15 +96,6 @@ Access the visual playground at `/web` to:
 - **Watch Agent**: See a strategic agent negotiate step-by-step
 - **Instructions**: Learn how to play and what each field means
-### API Endpoints
-| Endpoint | Method | Description |
-|----------|--------|-------------|
-| `/reset` | POST | Reset environment with task_id and seed |
-| `/step` | POST | Execute an action |
-| `/state` | GET | Get current negotiation state |
-| `/health` | GET | Health check |
 ### Quick Tips
 - Use **collaborative language** ("partnership", "mutual") to increase rapport
@@ -102,7 +103,6 @@ Access the visual playground at `/web` to:
 - In **adversarial**, avoid 2+ consecutive concessions or opponent hardens
 """
-    # Example actions for the Example tab
     EXAMPLE_1 = {
         "move_type": "make_offer",
         "terms": {"price": 48000},
@@ -115,28 +115,11 @@ Access the visual playground at `/web` to:
         "message": "We appreciate your flexibility. Here's our counter-offer to move us closer to a mutual agreement.",
     }
-    # Agent strategies for auto-play
     AGENT_STRATEGY = [
-        (
-            "make_offer",
-            {"price": 48000},
-            "I value our partnership and believe we can reach a fair agreement together.",
-        ),
-        (
-            "make_offer",
-            {"price": 46000},
-            "I appreciate your movement. Let's see if we can meet in the middle.",
-        ),
-        (
-            "make_offer",
-            {"price": 44000},
-            "We're getting closer. I think we can finalize this at a fair price for both parties.",
-        ),
-        (
-            "make_offer",
-            {"price": 42000},
-            "I believe we've found a good deal. Let's accept these terms.",
-        ),
         ("accept", {}, ""),
     ]
@@ -160,7 +143,6 @@ Access the visual playground at `/web` to:
             action_data = {"move_type": move_type, "terms": terms, "message": message}
             data = await web_manager.step_environment(action_data)
-            # Update conversation
             new_conv = conversation_state.copy() if conversation_state else []
             new_conv.append(
                 {
@@ -169,6 +151,7 @@ Access the visual playground at `/web` to:
                     "terms": terms,
                 }
             )
             if not data.get("observation", {}).get("done"):
                 supplier_msg = data.get("observation", {}).get("supplier_message", "")
                 new_conv.append(
@@ -179,11 +162,8 @@ Access the visual playground at `/web` to:
                     }
                 )
-            # Get price info for chart
             obs = data.get("observation", {})
             current_price = obs.get("current_offer", {}).get("price", 0)
-            opponent_opening = 52000  # Will be extracted from state
             reward = obs.get("reward")
             done = obs.get("done", False)
             status_msg = f"Step complete! Round {obs.get('round_number', 0)}/{obs.get('max_rounds', 6)}"
@@ -207,9 +187,7 @@ Access the visual playground at `/web` to:
     async def run_agent_example(task_id="single_issue", seed=42):
         try:
-            # Reset first
             await web_manager.reset_environment({"task_id": task_id, "seed": seed})
             conv = []
             steps_log = []
             price_points = []
@@ -225,7 +203,6 @@ Access the visual playground at `/web` to:
                 current_price = obs.get("current_offer", {}).get("price", 0)
                 price_points.append(current_price)
                 conv.append(
                     {
                         "role": "you",
@@ -265,17 +242,9 @@ Access the visual playground at `/web` to:
                 "✅ Agent demo complete!",
             )
         except Exception as e:
-            return f"Error: {e}", "", f"Error: {e}", ""
-    def apply_example(example_data):
-        return (
-            example_data["move_type"],
-            json.dumps(example_data["terms"]),
-            example_data["message"],
-        )
     def _format_observation_full(data):
-        """Format observation as rich markdown."""
         if not data:
             return "No data"
         obs = data.get("observation", data)
@@ -319,7 +288,6 @@ Access the visual playground at `/web` to:
         return "😐"
     def _build_conversation_hist(conv):
-        """Build conversation history HTML."""
         if not conv:
             return "**Conversation will appear here...**\n\nMake your first offer to start the negotiation!"
         lines = ["## 💬 Conversation History\n"]
@@ -333,7 +301,6 @@ Access the visual playground at `/web` to:
         return "\n".join(lines)
     def _build_price_display(round_num, current_price, target, opening):
-        """Build price tracker display."""
         range_price = opening - target
         progress = (
             ((opening - current_price) / range_price * 100) if range_price > 0 else 0
@@ -351,10 +318,11 @@ Access the visual playground at `/web` to:
         return "\n".join(lines)
     def _build_agent_demo_result(steps_log, conv, price_points):
-        """Build agent demo result display."""
-        lines = ["## 🤖 Agent Negotiation Demo\n"]
-        lines.append("Watch how a strategic agent negotiates:\n")
-        lines.append("### 📜 Steps:")
         lines.extend(steps_log)
         lines.append("\n### 💬 Full Conversation:")
         for msg in conv:
@@ -371,14 +339,11 @@ Access the visual playground at `/web` to:
         gr.Markdown(f"# 🤝 {display_title}")
         gr.Markdown("### Interactive Procurement Negotiation Simulation")
-        with gr.Tabs() as tabs:
             with gr.TabItem("🎮 Play Now"):
-                """Interactive tab where user plays against the opponent."""
                 with gr.Row():
                     with gr.Column(scale=2):
-                        conversation_display = gr.Markdown(
-                            "*Click Reset to start a new negotiation!*"
-                        )
                         price_tracker = gr.Markdown(
                             "## 📊 Price Tracker\n*Reset to see price tracker*"
                         )
@@ -389,27 +354,21 @@ Access the visual playground at `/web` to:
                             choices=["single_issue", "multi_issue", "adversarial"],
                             value="single_issue",
                             label="Task",
-                            info="Choose which negotiation scenario",
-                        )
-                        seed_input = gr.Number(
-                            value=42,
-                            label="Seed",
-                            info="Random seed for reproducibility",
                         )
                         move_type_input = gr.Textbox(
                             label="Move Type",
-                            info="make_offer | accept | reject | bundle",
                             value="make_offer",
                         )
                         terms_input = gr.Textbox(
                             label="Terms (JSON)",
-                            info='Example: {"price": 45000}',
                             value='{"price": 48000}',
                         )
                         message_input = gr.Textbox(
                             label="Your Message",
-                            info="Be collaborative for better rapport!",
-                            value="I value our partnership and believe we can reach a fair agreement.",
                             lines=2,
                         )
@@ -433,27 +392,25 @@ Access the visual playground at `/web` to:
                         status_output = gr.Textbox(
                             label="Status", interactive=False, lines=1
                         )
                         with gr.Accordion("📋 Raw JSON", open=False):
                             raw_json = gr.Code(
                                 label="", language="json", interactive=False, lines=10
                             )
-                # Example messages for quick fill
                 FRIENDLY_EX = (
                     "make_offer",
                     '{"price": 48000}',
-                    "I truly value our partnership and believe we can find a fair solution that benefits both parties. I'm flexible and want to work with you.",
                 )
                 PROF_EX = (
                     "make_offer",
                     '{"price": 46000}',
-                    "Based on market research and our long-term relationship potential, I believe $46,000 is a fair price. What do you think?",
                 )
                 COUNTER_EX = (
                     "make_offer",
                     '{"price": 44000}',
-                    "We've made good progress. I can meet you at $44,000 if you can agree to these terms today.",
                 )
                 def get_friendly():
@@ -470,8 +427,7 @@ Access the visual playground at `/web` to:
                     outputs=[move_type_input, terms_input, message_input],
                 )
                 eg2_btn.click(
-                    fn=get_prof,
-                    outputs=[move_type_input, terms_input, message_input],
                 )
                 eg3_btn.click(
                     fn=get_counter,
@@ -523,10 +479,9 @@ Access the visual playground at `/web` to:
                 )
             with gr.TabItem("🤖 Watch Agent"):
-                """Example tab showing agent negotiation demo."""
                 gr.Markdown("### Watch a Strategic Agent Negotiate")
                 gr.Markdown(
-                    "This demo shows how an LLM agent would approach the negotiation with collaborative language and strategic pricing."
                 )
                 with gr.Row():
                     task_selector = gr.Dropdown(
@@ -556,7 +511,6 @@ Access the visual playground at `/web` to:
                 )
             with gr.TabItem("📖 Instructions"):
-                """Instructions tab."""
                 gr.Markdown("""
                 ## 🎮 How to Play
@@ -566,34 +520,28 @@ Access the visual playground at `/web` to:
                 - **adversarial**: Negotiate price + payment + support (hardest)
                 ### 2. Make Offers
-                - **Move Type**: `make_offer` to propose terms, `accept` to take current deal, `reject` to walk away
-                - **Terms**: JSON with your offered price (and payment_days for multi_issue/adversarial)
-                - **Message**: Be collaborative! Use words like "partnership", "mutual", "flexible" to increase rapport
                 ### 3. Watch the Response
-                - The supplier will counter-offer or accept
-                - Your **rapport** changes based on your language quality
                 - Higher rapport → opponent gives better concessions
                 ### 4. Goal
-                - Get the price as close to your target (shown in observations) as possible
-                - Use fewer rounds for a better efficiency score
                 - **Don't make 2+ consecutive concessions** in adversarial mode!
                 ## 🎯 Quick Tips
                 | Do | Don't |
                 |---|---|
-                | Use collaborative language | Use aggressive language ("final offer", "ultimatum") |
-                | Make strategic concessions | Concede every round (adversarial mode) |
-                | Offer Net-30 payment (multi_issue) | Ignore payment terms |
-                | Accept when terms are good | Wait until max rounds |
-                ## 🤖 Agent Demo
-                The "Watch Agent" tab shows how a strategic agent negotiates step-by-step.
                 """)
-        # Quick Start and README accordions
         with gr.Accordion("📘 Quick Start Guide", open=False):
             gr.Markdown(custom_quick_start_md)
         with gr.Accordion("📚 Full README", open=False):
@@ -603,7 +551,6 @@ Access the visual playground at `/web` to:
 def _load_readme_content(metadata):
-    """Load README content from metadata or filesystem."""
     if metadata and hasattr(metadata, "readme_content") and metadata.readme_content:
         return metadata.readme_content
     try:
@@ -617,41 +564,6 @@ def _load_readme_content(metadata):
     return "No README available."
-def _format_observation(data):
-    """Format observation as markdown for display."""
-    if not data:
-        return "No data"
-    obs = data.get("observation", data)
-    lines = []
-    task_id = obs.get("task_id", "")
-    round_num = obs.get("round_number", 0)
-    max_rounds = obs.get("max_rounds", 0)
-    done = obs.get("done", False)
-    reward = obs.get("reward")
-    lines.append(f"### Round {round_num}/{max_rounds}")
-    lines.append(f"**Task:** {task_id}")
-    lines.append(f"**Done:** {done}")
-    if reward is not None:
-        lines.append(f"**Reward:** {reward:.4f}")
-    supplier_msg = obs.get("supplier_message", "")
-    if supplier_msg:
-        lines.append(f"\n**Supplier:** {supplier_msg}")
-    current_offer = obs.get("current_offer", {})
-    if current_offer:
-        lines.append(f"\n**Current Offer:** {json.dumps(current_offer)}")
-    rapport = obs.get("rapport_hint", "neutral")
-    lines.append(f"\n**Rapport:** {rapport}")
-    return "\n".join(lines)
 app = create_app(
     lambda: _env_instance,
     NegotiationAction,

 try:
     from openenv.core.env_server.http_server import create_app
+    import openenv.core.env_server.web_interface as _mod
+    _orig = _mod.get_quick_start_markdown
+    def _fixed(md, ac, oc):
+        return _orig(md, ac, oc).replace(
+            "http://localhost:8000", "http://localhost:7860"
+        )
+    _mod.get_quick_start_markdown = _fixed
 except Exception as e:
     raise ImportError(
         "openenv is required for the web interface. Install dependencies with '\n    uv sync\n'"
     readme_content = _load_readme_content(metadata)
     display_title = metadata.name if metadata else title
     custom_quick_start_md = """### Connect to this environment
 Connect from Python using `ProcureRLEnv`:
 - **Watch Agent**: See a strategic agent negotiate step-by-step
 - **Instructions**: Learn how to play and what each field means
 ### Quick Tips
 - Use **collaborative language** ("partnership", "mutual") to increase rapport
 - In **adversarial**, avoid 2+ consecutive concessions or opponent hardens
 """
     EXAMPLE_1 = {
         "move_type": "make_offer",
         "terms": {"price": 48000},
         "message": "We appreciate your flexibility. Here's our counter-offer to move us closer to a mutual agreement.",
     }
     AGENT_STRATEGY = [
+        ("make_offer", {"price": 48000}, "I value our partnership."),
+        ("make_offer", {"price": 46000}, "I appreciate your movement."),
+        ("make_offer", {"price": 44000}, "We're getting closer."),
+        ("make_offer", {"price": 42000}, "I believe we've found a good deal."),
         ("accept", {}, ""),
     ]
             action_data = {"move_type": move_type, "terms": terms, "message": message}
             data = await web_manager.step_environment(action_data)
             new_conv = conversation_state.copy() if conversation_state else []
             new_conv.append(
                 {
                     "terms": terms,
                 }
             )
             if not data.get("observation", {}).get("done"):
                 supplier_msg = data.get("observation", {}).get("supplier_message", "")
                 new_conv.append(
                     }
                 )
             obs = data.get("observation", {})
             current_price = obs.get("current_offer", {}).get("price", 0)
             reward = obs.get("reward")
             done = obs.get("done", False)
             status_msg = f"Step complete! Round {obs.get('round_number', 0)}/{obs.get('max_rounds', 6)}"
     async def run_agent_example(task_id="single_issue", seed=42):
         try:
             await web_manager.reset_environment({"task_id": task_id, "seed": seed})
             conv = []
             steps_log = []
             price_points = []
                 current_price = obs.get("current_offer", {}).get("price", 0)
                 price_points.append(current_price)
                 conv.append(
                     {
                         "role": "you",
                 "✅ Agent demo complete!",
             )
         except Exception as e:
+            return f"Error: {e}", "", f"Error: {e}"
     def _format_observation_full(data):
         if not data:
             return "No data"
         obs = data.get("observation", data)
         return "😐"
     def _build_conversation_hist(conv):
         if not conv:
             return "**Conversation will appear here...**\n\nMake your first offer to start the negotiation!"
         lines = ["## 💬 Conversation History\n"]
         return "\n".join(lines)
     def _build_price_display(round_num, current_price, target, opening):
         range_price = opening - target
         progress = (
             ((opening - current_price) / range_price * 100) if range_price > 0 else 0
         return "\n".join(lines)
     def _build_agent_demo_result(steps_log, conv, price_points):
+        lines = [
+            "## 🤖 Agent Negotiation Demo\n",
+            "Watch how a strategic agent negotiates:\n",
+            "### 📜 Steps:",
+        ]
         lines.extend(steps_log)
         lines.append("\n### 💬 Full Conversation:")
         for msg in conv:
         gr.Markdown(f"# 🤝 {display_title}")
         gr.Markdown("### Interactive Procurement Negotiation Simulation")
+        with gr.Tabs():
             with gr.TabItem("🎮 Play Now"):
                 with gr.Row():
                     with gr.Column(scale=2):
+                        conversation_display = gr.Markdown("*Click Reset to start!*")
                         price_tracker = gr.Markdown(
                             "## 📊 Price Tracker\n*Reset to see price tracker*"
                         )
                             choices=["single_issue", "multi_issue", "adversarial"],
                             value="single_issue",
                             label="Task",
                         )
+                        seed_input = gr.Number(value=42, label="Seed")
                         move_type_input = gr.Textbox(
                             label="Move Type",
                             value="make_offer",
+                            info="make_offer | accept | reject | bundle",
                         )
                         terms_input = gr.Textbox(
                             label="Terms (JSON)",
                             value='{"price": 48000}',
+                            info='Example: {"price": 45000}',
                         )
                         message_input = gr.Textbox(
                             label="Your Message",
+                            value="I value our partnership.",
                             lines=2,
                         )
                         status_output = gr.Textbox(
                             label="Status", interactive=False, lines=1
                         )
                         with gr.Accordion("📋 Raw JSON", open=False):
                             raw_json = gr.Code(
                                 label="", language="json", interactive=False, lines=10
                             )
                 FRIENDLY_EX = (
                     "make_offer",
                     '{"price": 48000}',
+                    "I truly value our partnership and believe we can find a fair solution.",
                 )
                 PROF_EX = (
                     "make_offer",
                     '{"price": 46000}',
+                    "Based on market research and our long-term relationship, I believe $46,000 is fair.",
                 )
                 COUNTER_EX = (
                     "make_offer",
                     '{"price": 44000}',
+                    "We've made good progress. I can meet you at $44,000.",
                 )
                 def get_friendly():
                     outputs=[move_type_input, terms_input, message_input],
                 )
                 eg2_btn.click(
+                    fn=get_prof, outputs=[move_type_input, terms_input, message_input]
                 )
                 eg3_btn.click(
                     fn=get_counter,
                 )
             with gr.TabItem("🤖 Watch Agent"):
                 gr.Markdown("### Watch a Strategic Agent Negotiate")
                 gr.Markdown(
+                    "This demo shows how a strategic agent approaches the negotiation."
                 )
                 with gr.Row():
                     task_selector = gr.Dropdown(
                 )
             with gr.TabItem("📖 Instructions"):
                 gr.Markdown("""
                 ## 🎮 How to Play
                 - **adversarial**: Negotiate price + payment + support (hardest)
                 ### 2. Make Offers
+                - **Move Type**: `make_offer` to propose, `accept` to take deal, `reject` to walk away
+                - **Terms**: JSON with your offered price
+                - **Message**: Be collaborative for better rapport!
                 ### 3. Watch the Response
+                - Your **rapport** changes based on language quality
                 - Higher rapport → opponent gives better concessions
                 ### 4. Goal
+                - Get price close to your target
+                - Use fewer rounds for better efficiency score
                 - **Don't make 2+ consecutive concessions** in adversarial mode!
                 ## 🎯 Quick Tips
                 | Do | Don't |
                 |---|---|
+                | Use collaborative language | Use aggressive language |
+                | Make strategic concessions | Concede every round |
+                | Offer Net-30 payment | Ignore payment terms |
                 """)
         with gr.Accordion("📘 Quick Start Guide", open=False):
             gr.Markdown(custom_quick_start_md)
         with gr.Accordion("📚 Full README", open=False):
 def _load_readme_content(metadata):
     if metadata and hasattr(metadata, "readme_content") and metadata.readme_content:
         return metadata.readme_content
     try:
     return "No README available."
 app = create_app(
     lambda: _env_instance,
     NegotiationAction,