Spaces:
Sleeping
Sleeping
| """ | |
| ESCTR β Research Interface | |
| Dark-themed Gradio UI with IBM Plex Mono typography. | |
| Tabs: Blog Β· Playground Β· Leaderboard | |
| """ | |
| import gradio as gr | |
| import random | |
| from .environment import ESCTREnvironment | |
| from .models import ESCTRAction | |
| from .ui_styles import ( | |
| INJECT_CSS, HEADER_HTML, ARCH_SVG, EPISODE_SVG, | |
| BLOG_HTML, LEADERBOARD_HTML, | |
| ) | |
| # ββ State management βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def create_env(): | |
| return ESCTREnvironment() | |
| def reset_episode(task_name, seed_text): | |
| """Reset the environment with a task and seed.""" | |
| env = create_env() | |
| seed = int(seed_text) if seed_text.strip() else random.randint(0, 99999) | |
| obs = env.reset(task_name=task_name, seed=seed) | |
| log = f"{'='*60}\n" | |
| log += f" ESCTR β New Episode\n" | |
| log += f" Task: {task_name} | Seed: {seed}\n" | |
| log += f"{'='*60}\n\n" | |
| log += f"BRIEFING:\n{obs.system_response}\n\n" | |
| log += f"{'β'*60}\n" | |
| status = f"Step 0 | Reward: 0.00 | Investigating..." | |
| return ( | |
| env, log, "0.00", status, str(seed), 0, | |
| gr.update(interactive=True), | |
| gr.update(interactive=True), | |
| gr.update(interactive=True), | |
| gr.update(interactive=True), | |
| ) | |
| def execute_tool(env, log, step_count, action_type, **kwargs): | |
| """Execute a tool action and update the log.""" | |
| if env is None: | |
| return env, log + "\n>> Reset the environment first.\n", "0.00", "Not started", step_count | |
| try: | |
| action = ESCTRAction(action_type=action_type, **kwargs) | |
| obs = env.step(action) | |
| except Exception as e: | |
| log += f"\nERROR: {str(e)}\n" | |
| return env, log, "0.00", "Error", step_count | |
| step_count += 1 | |
| reward = obs.reward | |
| done = obs.done | |
| param_str = ", ".join(f'{k}="{v}"' for k, v in kwargs.items() if v) | |
| log += f"\n[Step {step_count}] {action_type}({param_str})\n" | |
| log += f"{'β'*40}\n" | |
| response = obs.system_response | |
| if len(response) > 1500: | |
| response = response[:1500] + "\n... [truncated]" | |
| log += f"{response}\n" | |
| log += f"{'β'*40}\n" | |
| if done: | |
| log += f"\n{'='*60}\n" | |
| log += f" EPISODE COMPLETE\n" | |
| log += f" Final Reward: {reward:.4f}\n" | |
| log += f" Steps Used: {step_count}\n" | |
| log += f"{'='*60}\n" | |
| status = f"Done in {step_count} steps | Final Reward: {reward:.4f}" | |
| else: | |
| status = f"Step {step_count} | Reward: {reward:.4f} | Investigating..." | |
| return env, log, f"{reward:.4f}", status, step_count | |
| def query_db(env, log, step_count, table): | |
| if not table: | |
| log += "\n>> Select a table.\n" | |
| return env, log, "0.00", "Select a table", step_count | |
| return execute_tool(env, log, step_count, "query_database", | |
| query_parameters={"table": table}) | |
| def read_doc(env, log, step_count, doc_id): | |
| if not doc_id.strip(): | |
| log += "\n>> Enter a document ID.\n" | |
| return env, log, "0.00", "Enter ID", step_count | |
| return execute_tool(env, log, step_count, "read_document", | |
| document_id=doc_id.strip()) | |
| def contact_vendor(env, log, step_count, message): | |
| if not message.strip(): | |
| log += "\n>> Enter a message.\n" | |
| return env, log, "0.00", "Enter message", step_count | |
| return execute_tool(env, log, step_count, "communicate_vendor", | |
| message_content=message.strip()) | |
| def submit_decision(env, log, step_count, amount, reason): | |
| try: | |
| amt = float(amount) | |
| except (ValueError, TypeError): | |
| log += "\n>> Enter a valid numeric amount.\n" | |
| return env, log, "0.00", "Invalid amount", step_count | |
| if not reason.strip(): | |
| reason = "Financial adjustment based on investigation" | |
| return execute_tool(env, log, step_count, "submit_financial_decision", | |
| adjustment_amount=amt, adjustment_reason=reason.strip()) | |
| # ββ Build UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_gradio_app(): | |
| with gr.Blocks(title="ESCTR Environment") as demo: | |
| # Hidden state | |
| env_state = gr.State(None) | |
| step_counter = gr.State(0) | |
| # Inject dark theme CSS + header | |
| gr.HTML(INJECT_CSS) | |
| gr.HTML(HEADER_HTML) | |
| with gr.Tabs(): | |
| # ββ Tab 1: Blog βββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("Blog"): | |
| gr.HTML(BLOG_HTML) | |
| # ββ Tab 2: Playground βββββββββββββββββββββββββββββ | |
| with gr.Tab("Playground"): | |
| gr.HTML(EPISODE_SVG) | |
| gr.HTML('<p style="text-align:center;font-family:IBM Plex Mono,monospace;font-size:0.85rem;color:#64748b;font-style:italic;margin:0 0 1rem">Select a task and seed below, then use the tools to investigate. Click submit when ready.</p>') | |
| # Row 1: Task + Seed + Start | |
| with gr.Row(): | |
| task_dropdown = gr.Dropdown( | |
| choices=[ | |
| ("Procurement Reconciliation (Easy)", | |
| "procurement_reconciliation"), | |
| ("SLA Enforcement (Medium)", | |
| "sla_enforcement"), | |
| ("Adversarial Auditing (Hard)", | |
| "adversarial_auditing"), | |
| ], | |
| value="procurement_reconciliation", | |
| label="Task", | |
| scale=2, | |
| ) | |
| seed_input = gr.Textbox( | |
| label="Seed", | |
| placeholder="random", | |
| value="", | |
| scale=1, | |
| ) | |
| reset_btn = gr.Button( | |
| "βΆ Start Episode", | |
| variant="primary", | |
| scale=1, | |
| ) | |
| # Status bar | |
| with gr.Row(): | |
| status_bar = gr.Textbox( | |
| label="Status", | |
| value="Click 'βΆ Start Episode' to begin", | |
| interactive=False, | |
| scale=3, | |
| ) | |
| reward_display = gr.Textbox( | |
| label="Reward", | |
| value="β", | |
| interactive=False, | |
| scale=1, | |
| ) | |
| seed_display = gr.Textbox( | |
| label="Active Seed", | |
| value="β", | |
| interactive=False, | |
| scale=1, | |
| ) | |
| # Row 2: Tools (left) + Log (right) | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=300): | |
| gr.HTML('<p style="font-family:IBM Plex Mono,monospace;font-weight:600;font-size:0.9rem;color:#0f172a;margin:0.5rem 0">Tools</p>') | |
| # Tool 1: query_database | |
| db_table = gr.Dropdown( | |
| choices=[ | |
| "purchase_orders", "invoices", | |
| "shipping_logs", "sla_contracts", | |
| "warehouse_logs", | |
| ], | |
| label="query_database β table", | |
| value="purchase_orders", | |
| ) | |
| query_btn = gr.Button("Run Query") | |
| # Tool 2: read_document | |
| doc_id_input = gr.Textbox( | |
| label="read_document β ID", | |
| placeholder="PO-2025-1234", | |
| ) | |
| read_btn = gr.Button("Read Document") | |
| # Tool 3: communicate_vendor | |
| vendor_msg = gr.Textbox( | |
| label="communicate_vendor β message", | |
| placeholder="We reject your settlement...", | |
| lines=2, | |
| ) | |
| vendor_btn = gr.Button("Send Message") | |
| # Tool 4: submit_financial_decision | |
| gr.HTML('<hr style="border:none;border-top:1px solid #e2e8f0;margin:0.5rem 0">') | |
| adj_amount = gr.Textbox( | |
| label="submit_financial_decision β amount ($)", | |
| placeholder="-450.00", | |
| ) | |
| adj_reason = gr.Textbox( | |
| label="Reason", | |
| placeholder="Overcharge on line items", | |
| ) | |
| submit_btn = gr.Button( | |
| "β‘ Submit Decision", variant="stop", | |
| ) | |
| with gr.Column(scale=2): | |
| log_output = gr.Textbox( | |
| label="Investigation Log", | |
| value="Waiting for episode...", | |
| lines=28, | |
| max_lines=50, | |
| interactive=False, | |
| ) | |
| # ββ Tab 3: Leaderboard ββββββββββββββββββββββββββββ | |
| with gr.Tab("Leaderboard"): | |
| gr.HTML(LEADERBOARD_HTML) | |
| # ββ Event Handlers ββββββββββββββββββββββββββββββββββββ | |
| reset_outputs = [ | |
| env_state, log_output, reward_display, status_bar, | |
| seed_display, step_counter, | |
| query_btn, read_btn, vendor_btn, submit_btn, | |
| ] | |
| reset_btn.click( | |
| fn=reset_episode, | |
| inputs=[task_dropdown, seed_input], | |
| outputs=reset_outputs, | |
| ) | |
| tool_outputs = [ | |
| env_state, log_output, reward_display, | |
| status_bar, step_counter, | |
| ] | |
| query_btn.click( | |
| fn=query_db, | |
| inputs=[env_state, log_output, step_counter, db_table], | |
| outputs=tool_outputs, | |
| ) | |
| read_btn.click( | |
| fn=read_doc, | |
| inputs=[env_state, log_output, step_counter, doc_id_input], | |
| outputs=tool_outputs, | |
| ) | |
| vendor_btn.click( | |
| fn=contact_vendor, | |
| inputs=[env_state, log_output, step_counter, vendor_msg], | |
| outputs=tool_outputs, | |
| ) | |
| submit_btn.click( | |
| fn=submit_decision, | |
| inputs=[ | |
| env_state, log_output, step_counter, | |
| adj_amount, adj_reason, | |
| ], | |
| outputs=tool_outputs, | |
| ) | |
| return demo | |