Spaces:
Runtime error
Runtime error
Ram Narayanan Ananthakrishnapuram Sampath
Added dashboard and rendered with a local LLM to validate Env interaction
8918e76 | import json | |
| from client import CustomerEnv, CustomerAction | |
| import gradio as gr | |
| LOCAL_ENV_URL = "http://localhost:8000" | |
| def interact_with_env(user_message, history, cum_reward, step_count, current_customer_state, current_db_state, progress=gr.Progress()): | |
| try: | |
| # --- 1. LAYER 1: Parse Agent Policy (Belief State & Action) --- | |
| progress(0.1, desc="Parsing your action...") | |
| action_type = "speak" | |
| content = user_message | |
| tool_args = {} | |
| thinking_text = "N/A" | |
| if user_message.strip().startswith("{"): | |
| try: | |
| data = json.loads(user_message) | |
| action_type = data.get("action_type", "speak") | |
| content = data.get("content", "") | |
| tool_args = data.get("tool_args", {}) | |
| thinking_text = data.get("thinking", "No explicit thinking provided.") | |
| except json.JSONDecodeError: | |
| pass | |
| tool_text = f"π§ Tool: {content}\nπ¦ Args: {json.dumps(tool_args)}" if action_type == "tool_call" else "No tools invoked." | |
| action = CustomerAction(action_type=action_type, content=content, tool_args=tool_args) | |
| # --- 2. ORCHESTRATOR: Run the step --- | |
| with CustomerEnv(base_url=LOCAL_ENV_URL) as env: | |
| if len(history) == 0: | |
| env.reset() | |
| cum_reward = 0.0 | |
| step_count = 0 | |
| if action_type == "end_call": | |
| progress(0.5, desc="π§ Asking the Judge LLM to grade the episode...") | |
| elif action_type == "speak": | |
| progress(0.5, desc="π£οΈ Waiting for the simulated customer to reply...") | |
| else: | |
| progress(0.5, desc="π» Pinging the Sandbox Database...") | |
| result = env.step(action) | |
| # --- 3. LAYER 2: Get Customer Agent & Sandbox Responses --- | |
| progress(0.9, desc="Formatting results...") | |
| cust_reply = result.observation.customer_reply | |
| tool_resp = result.observation.tool_response | |
| env_chat_bubble = "" | |
| if cust_reply: | |
| env_chat_bubble += f"π£οΈ **Customer:** {cust_reply}\n\n" | |
| if tool_resp: | |
| env_chat_bubble += f"π» **Sandbox API:** `{tool_resp}`\n\n" | |
| if not env_chat_bubble: | |
| env_chat_bubble = "*(No response from environment)*" | |
| if result.done: | |
| env_chat_bubble += "\nπ **Episode Complete!**" | |
| # --- 4. Extract Hidden States (Mocked fallback if env doesn't expose them yet) --- | |
| # If your OpenEnv has these attributes, it will display them! Otherwise, defaults. | |
| updated_customer_state = getattr(env, 'customer_state', {"Intent": "Dispute Fee", "Satisfaction": "Frustrated (3/10)", "Cooperation": "Low"}) | |
| updated_db_state = getattr(env, 'db_state', {"Account": "Active", "Balance": "$450.00", "Recent_Txn": "Overdraft Fee ($35)"}) | |
| cust_state_md = f"**Hidden Intent:** {updated_customer_state.get('Intent', 'Unknown')}\n\n" \ | |
| f"**Satisfaction Tracker:** {updated_customer_state.get('Satisfaction', 'Neutral')}\n\n" \ | |
| f"**Cooperation Level:** {updated_customer_state.get('Cooperation', 'Moderate')}" | |
| # --- 5. REWARD MODULE: Scorecard --- | |
| step_reward = float(result.reward) | |
| cum_reward += step_reward | |
| step_count += 1 | |
| accuracy_text = f"Step Reward: {step_reward:.2f} | Cumulative: {cum_reward:.2f}" | |
| scorecard_md = f""" | |
| ### π Reward Module Output | |
| * **Turn Count:** {step_count} | |
| * **Tool Use Efficiency:** {'Active' if action_type == 'tool_call' else 'Pending'} | |
| * **Resolution State:** {'Completed' if result.done else 'In Progress'} | |
| * **Total Episode Score:** {cum_reward:.2f} | |
| """ | |
| # Add to transcript | |
| agent_chat_bubble = f"[{action_type.upper()}] {content}" | |
| history.append({"role": "user", "content": agent_chat_bubble}) | |
| history.append({"role": "assistant", "content": env_chat_bubble.strip()}) | |
| return (history, history, thinking_text, tool_text, accuracy_text, | |
| cust_state_md, updated_db_state, scorecard_md, | |
| cum_reward, step_count, updated_customer_state, updated_db_state) | |
| except Exception as e: | |
| history.append((user_message, f"β System Error: {str(e)}")) | |
| return history, history, "Error", "Error", "Error", "Error", current_db_state, "Error", cum_reward, step_count, current_customer_state, current_db_state | |
| def reset_ui(): | |
| init_cust = {"Intent": "Waiting...", "Satisfaction": "N/A"} | |
| init_db = {"Status": "Waiting for start"} | |
| return ([], [], 0.0, 0, "", "", "", | |
| "**Hidden Intent:** Waiting...\n\n**Satisfaction Tracker:** N/A\n\n**Cooperation Level:** N/A", | |
| init_db, "### π Reward Module Output\nWaiting...", init_cust, init_db) |