Spaces:

ramnarayanan747
/

voice_agent

Runtime error

Ram Narayanan Ananthakrishnapuram Sampath

Added dashboard and rendered with a local LLM to validate Env interaction

8918e76 about 1 month ago

5.18 kB

	import json
	from client import CustomerEnv, CustomerAction
	import gradio as gr

	LOCAL_ENV_URL = "http://localhost:8000"

	def interact_with_env(user_message, history, cum_reward, step_count, current_customer_state, current_db_state, progress=gr.Progress()):
	try:
	# --- 1. LAYER 1: Parse Agent Policy (Belief State & Action) ---
	progress(0.1, desc="Parsing your action...")
	action_type = "speak"
	content = user_message
	tool_args = {}
	thinking_text = "N/A"

	if user_message.strip().startswith("{"):
	try:
	data = json.loads(user_message)
	action_type = data.get("action_type", "speak")
	content = data.get("content", "")
	tool_args = data.get("tool_args", {})
	thinking_text = data.get("thinking", "No explicit thinking provided.")
	except json.JSONDecodeError:
	pass

	tool_text = f"🔧 Tool: {content}\n📦 Args: {json.dumps(tool_args)}" if action_type == "tool_call" else "No tools invoked."
	action = CustomerAction(action_type=action_type, content=content, tool_args=tool_args)

	# --- 2. ORCHESTRATOR: Run the step ---
	with CustomerEnv(base_url=LOCAL_ENV_URL) as env:
	if len(history) == 0:
	env.reset()
	cum_reward = 0.0
	step_count = 0


	if action_type == "end_call":
	progress(0.5, desc="🧠 Asking the Judge LLM to grade the episode...")
	elif action_type == "speak":
	progress(0.5, desc="🗣️ Waiting for the simulated customer to reply...")
	else:
	progress(0.5, desc="💻 Pinging the Sandbox Database...")

	result = env.step(action)

	# --- 3. LAYER 2: Get Customer Agent & Sandbox Responses ---
	progress(0.9, desc="Formatting results...")

	cust_reply = result.observation.customer_reply
	tool_resp = result.observation.tool_response

	env_chat_bubble = ""
	if cust_reply:
	env_chat_bubble += f"🗣️ Customer: {cust_reply}\n\n"
	if tool_resp:
	env_chat_bubble += f"💻 Sandbox API: `{tool_resp}`\n\n"

	if not env_chat_bubble:
	env_chat_bubble = "(No response from environment)"
	if result.done:
	env_chat_bubble += "\n🏁 Episode Complete!"

	# --- 4. Extract Hidden States (Mocked fallback if env doesn't expose them yet) ---
	# If your OpenEnv has these attributes, it will display them! Otherwise, defaults.
	updated_customer_state = getattr(env, 'customer_state', {"Intent": "Dispute Fee", "Satisfaction": "Frustrated (3/10)", "Cooperation": "Low"})
	updated_db_state = getattr(env, 'db_state', {"Account": "Active", "Balance": "$450.00", "Recent_Txn": "Overdraft Fee ($35)"})

	cust_state_md = f"Hidden Intent: {updated_customer_state.get('Intent', 'Unknown')}\n\n" \
	f"Satisfaction Tracker: {updated_customer_state.get('Satisfaction', 'Neutral')}\n\n" \
	f"Cooperation Level: {updated_customer_state.get('Cooperation', 'Moderate')}"

	# --- 5. REWARD MODULE: Scorecard ---
	step_reward = float(result.reward)
	cum_reward += step_reward
	step_count += 1
	accuracy_text = f"Step Reward: {step_reward:.2f} \| Cumulative: {cum_reward:.2f}"

	scorecard_md = f"""
	### 🏆 Reward Module Output
	* Turn Count: {step_count}
	* Tool Use Efficiency: {'Active' if action_type == 'tool_call' else 'Pending'}
	* Resolution State: {'Completed' if result.done else 'In Progress'}
	* Total Episode Score: {cum_reward:.2f}
	"""

	# Add to transcript
	agent_chat_bubble = f"[{action_type.upper()}] {content}"
	history.append({"role": "user", "content": agent_chat_bubble})
	history.append({"role": "assistant", "content": env_chat_bubble.strip()})

	return (history, history, thinking_text, tool_text, accuracy_text,
	cust_state_md, updated_db_state, scorecard_md,
	cum_reward, step_count, updated_customer_state, updated_db_state)

	except Exception as e:
	history.append((user_message, f"❌ System Error: {str(e)}"))
	return history, history, "Error", "Error", "Error", "Error", current_db_state, "Error", cum_reward, step_count, current_customer_state, current_db_state

	def reset_ui():
	init_cust = {"Intent": "Waiting...", "Satisfaction": "N/A"}
	init_db = {"Status": "Waiting for start"}
	return ([], [], 0.0, 0, "", "", "",
	"Hidden Intent: Waiting...\n\nSatisfaction Tracker: N/A\n\nCooperation Level: N/A",
	init_db, "### 🏆 Reward Module Output\nWaiting...", init_cust, init_db)