voice_agent / ui /logic.py
Ram Narayanan Ananthakrishnapuram Sampath
Added dashboard and rendered with a local LLM to validate Env interaction
8918e76
import json
from client import CustomerEnv, CustomerAction
import gradio as gr
LOCAL_ENV_URL = "http://localhost:8000"
def interact_with_env(user_message, history, cum_reward, step_count, current_customer_state, current_db_state, progress=gr.Progress()):
try:
# --- 1. LAYER 1: Parse Agent Policy (Belief State & Action) ---
progress(0.1, desc="Parsing your action...")
action_type = "speak"
content = user_message
tool_args = {}
thinking_text = "N/A"
if user_message.strip().startswith("{"):
try:
data = json.loads(user_message)
action_type = data.get("action_type", "speak")
content = data.get("content", "")
tool_args = data.get("tool_args", {})
thinking_text = data.get("thinking", "No explicit thinking provided.")
except json.JSONDecodeError:
pass
tool_text = f"πŸ”§ Tool: {content}\nπŸ“¦ Args: {json.dumps(tool_args)}" if action_type == "tool_call" else "No tools invoked."
action = CustomerAction(action_type=action_type, content=content, tool_args=tool_args)
# --- 2. ORCHESTRATOR: Run the step ---
with CustomerEnv(base_url=LOCAL_ENV_URL) as env:
if len(history) == 0:
env.reset()
cum_reward = 0.0
step_count = 0
if action_type == "end_call":
progress(0.5, desc="🧠 Asking the Judge LLM to grade the episode...")
elif action_type == "speak":
progress(0.5, desc="πŸ—£οΈ Waiting for the simulated customer to reply...")
else:
progress(0.5, desc="πŸ’» Pinging the Sandbox Database...")
result = env.step(action)
# --- 3. LAYER 2: Get Customer Agent & Sandbox Responses ---
progress(0.9, desc="Formatting results...")
cust_reply = result.observation.customer_reply
tool_resp = result.observation.tool_response
env_chat_bubble = ""
if cust_reply:
env_chat_bubble += f"πŸ—£οΈ **Customer:** {cust_reply}\n\n"
if tool_resp:
env_chat_bubble += f"πŸ’» **Sandbox API:** `{tool_resp}`\n\n"
if not env_chat_bubble:
env_chat_bubble = "*(No response from environment)*"
if result.done:
env_chat_bubble += "\n🏁 **Episode Complete!**"
# --- 4. Extract Hidden States (Mocked fallback if env doesn't expose them yet) ---
# If your OpenEnv has these attributes, it will display them! Otherwise, defaults.
updated_customer_state = getattr(env, 'customer_state', {"Intent": "Dispute Fee", "Satisfaction": "Frustrated (3/10)", "Cooperation": "Low"})
updated_db_state = getattr(env, 'db_state', {"Account": "Active", "Balance": "$450.00", "Recent_Txn": "Overdraft Fee ($35)"})
cust_state_md = f"**Hidden Intent:** {updated_customer_state.get('Intent', 'Unknown')}\n\n" \
f"**Satisfaction Tracker:** {updated_customer_state.get('Satisfaction', 'Neutral')}\n\n" \
f"**Cooperation Level:** {updated_customer_state.get('Cooperation', 'Moderate')}"
# --- 5. REWARD MODULE: Scorecard ---
step_reward = float(result.reward)
cum_reward += step_reward
step_count += 1
accuracy_text = f"Step Reward: {step_reward:.2f} | Cumulative: {cum_reward:.2f}"
scorecard_md = f"""
### πŸ† Reward Module Output
* **Turn Count:** {step_count}
* **Tool Use Efficiency:** {'Active' if action_type == 'tool_call' else 'Pending'}
* **Resolution State:** {'Completed' if result.done else 'In Progress'}
* **Total Episode Score:** {cum_reward:.2f}
"""
# Add to transcript
agent_chat_bubble = f"[{action_type.upper()}] {content}"
history.append({"role": "user", "content": agent_chat_bubble})
history.append({"role": "assistant", "content": env_chat_bubble.strip()})
return (history, history, thinking_text, tool_text, accuracy_text,
cust_state_md, updated_db_state, scorecard_md,
cum_reward, step_count, updated_customer_state, updated_db_state)
except Exception as e:
history.append((user_message, f"❌ System Error: {str(e)}"))
return history, history, "Error", "Error", "Error", "Error", current_db_state, "Error", cum_reward, step_count, current_customer_state, current_db_state
def reset_ui():
init_cust = {"Intent": "Waiting...", "Satisfaction": "N/A"}
init_db = {"Status": "Waiting for start"}
return ([], [], 0.0, 0, "", "", "",
"**Hidden Intent:** Waiting...\n\n**Satisfaction Tracker:** N/A\n\n**Cooperation Level:** N/A",
init_db, "### πŸ† Reward Module Output\nWaiting...", init_cust, init_db)