Spaces:
Running
Running
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """Custom Gradio UI for coding_tools_env.""" | |
| from __future__ import annotations | |
| import json | |
| from typing import Any, Optional | |
| import gradio as gr | |
| TOOL_SPECS: dict[str, dict[str, Any]] = { | |
| "bash": { | |
| "icon": "π₯οΈ", | |
| "description": "Execute a bash command inside the E2B sandbox.", | |
| "signature": "bash(command: str, timeout: float = 30)", | |
| }, | |
| "read": { | |
| "icon": "π", | |
| "description": "Read a file from the sandbox. Optional `offset` and `limit` slice the lines.", | |
| "signature": "read(file_path: str, offset?: int, limit?: int)", | |
| }, | |
| "write": { | |
| "icon": "βοΈ", | |
| "description": "Create or overwrite a file with `content`.", | |
| "signature": "write(file_path: str, content: str)", | |
| }, | |
| "edit": { | |
| "icon": "π§", | |
| "description": "Find-and-replace `old_string` with `new_string` in a file. Use `replace_all` for every occurrence.", | |
| "signature": "edit(file_path, old_string, new_string, replace_all=False)", | |
| }, | |
| "multi_edit": { | |
| "icon": "π", | |
| "description": "Apply a list of {old_string, new_string, replace_all?} edits sequentially in one file.", | |
| "signature": "multi_edit(file_path, edits: list[dict])", | |
| }, | |
| "glob": { | |
| "icon": "π", | |
| "description": "Glob for files matching a shell-style pattern, optionally rooted at `path`.", | |
| "signature": "glob(pattern: str, path?: str)", | |
| }, | |
| "grep": { | |
| "icon": "π§΅", | |
| "description": "Regex search across files. `include` is an optional file glob filter.", | |
| "signature": "grep(pattern: str, path?: str, include?: str)", | |
| }, | |
| "ls": { | |
| "icon": "π", | |
| "description": "List a directory. `ignore` is a comma-separated list of patterns to skip.", | |
| "signature": "ls(path: str = '.', ignore?: list[str])", | |
| }, | |
| "todo_write": { | |
| "icon": "ποΈ", | |
| "description": "Replace the agent's todo list. Each item: {id, content, status, priority}. Only one `in_progress` at a time.", | |
| "signature": "todo_write(todos: list[dict])", | |
| }, | |
| "submit_solution": { | |
| "icon": "π", | |
| "description": "Run the configured verify commands and finalise the episode with a reward.", | |
| "signature": "submit_solution()", | |
| }, | |
| } | |
| TOOL_CHOICES = [(f"{spec['icon']} {name}", name) for name, spec in TOOL_SPECS.items()] | |
| def _lines(value: str) -> list[str]: | |
| return [line.strip() for line in value.splitlines() if line.strip()] | |
| def _csv_list(value: str) -> list[str]: | |
| return [item.strip() for item in value.split(",") if item.strip()] | |
| def _extract_tool_text(result: dict[str, Any]) -> str: | |
| obs = result.get("observation", {}) | |
| if not isinstance(obs, dict): | |
| return "" | |
| tool_result = obs.get("result", {}) | |
| if not isinstance(tool_result, dict): | |
| return "" | |
| content = tool_result.get("content", []) | |
| if isinstance(content, list) and content: | |
| first = content[0] | |
| if isinstance(first, dict): | |
| return str(first.get("text", "")) | |
| return str(tool_result.get("data", "")) | |
| def _extract_tool_error(result: dict[str, Any]) -> bool: | |
| obs = result.get("observation", {}) | |
| if not isinstance(obs, dict): | |
| return False | |
| tool_result = obs.get("result", {}) | |
| if not isinstance(tool_result, dict): | |
| return False | |
| return bool(tool_result.get("is_error", False)) | |
| def _format_status(state: dict[str, Any]) -> str: | |
| if not state: | |
| return "**No active session.** Configure setup/verify and click *Reset sandbox*." | |
| sandbox_id = state.get("sandbox_id") or "β" | |
| step_count = state.get("step_count", 0) | |
| submitted = state.get("submitted", False) | |
| last_reward = state.get("last_reward") | |
| reward_str = "β" if last_reward is None else f"{last_reward}" | |
| submitted_str = "β submitted" if submitted else "β³ in progress" | |
| return ( | |
| f"**Session active** Β· sandbox `{sandbox_id}` Β· " | |
| f"steps `{step_count}` Β· {submitted_str} Β· reward `{reward_str}`" | |
| ) | |
| def _format_history(state: dict[str, Any]) -> list[list[str]]: | |
| history = state.get("tool_history", []) or [] | |
| rows: list[list[str]] = [] | |
| for entry in history[-25:][::-1]: | |
| if not isinstance(entry, dict): | |
| continue | |
| tool = str(entry.get("tool", "")) | |
| ok = "β " if entry.get("ok") else "β" | |
| output = str(entry.get("output") or entry.get("error") or "") | |
| if len(output) > 140: | |
| output = output[:137] + "β¦" | |
| rows.append([ok, tool, output]) | |
| return rows | |
| def coding_tools_ui_builder( | |
| web_manager, | |
| action_fields: list[dict[str, Any]], | |
| metadata: Optional[Any], | |
| is_chat_env: bool, | |
| title: str, | |
| quick_start_md: Optional[str], | |
| ) -> gr.Blocks: | |
| def state_payload() -> dict[str, Any]: | |
| try: | |
| return web_manager.get_state() | |
| except RuntimeError: | |
| return {} | |
| with gr.Blocks( | |
| title=f"{title} - Coding Tools", | |
| theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"), | |
| ) as demo: | |
| gr.Markdown(f"# π§° {title}") | |
| gr.Markdown( | |
| "E2B-backed coding environment exposing a SETA-style tool surface. " | |
| "Pick a tool, fill the inputs, and inspect the output, state, and call history." | |
| ) | |
| status_md = gr.Markdown(_format_status({})) | |
| with gr.Row(): | |
| # βββββββββ Left: Session controls βββββββββ | |
| with gr.Column(scale=1, min_width=320): | |
| with gr.Accordion("Session controls", open=True): | |
| setup_input = gr.Textbox( | |
| label="Setup commands (one per line)", | |
| value="mkdir -p /home/user/work", | |
| lines=3, | |
| ) | |
| verify_input = gr.Textbox( | |
| label="Verify commands (one per line)", | |
| value="pytest -q", | |
| lines=2, | |
| ) | |
| with gr.Row(): | |
| reset_btn = gr.Button("π Reset sandbox", variant="primary") | |
| close_btn = gr.Button("π Close", variant="secondary") | |
| # βββββββββ Middle: Tool selection + dynamic inputs βββββββββ | |
| with gr.Column(scale=2, min_width=420): | |
| gr.Markdown("### Tool") | |
| tool_dropdown = gr.Dropdown( | |
| choices=TOOL_CHOICES, | |
| value="bash", | |
| label="Select a tool", | |
| interactive=True, | |
| ) | |
| tool_help = gr.Markdown( | |
| f"**{TOOL_SPECS['bash']['signature']}** \n{TOOL_SPECS['bash']['description']}" | |
| ) | |
| # One Group per tool. Visibility toggled by tool_dropdown. | |
| with gr.Group(visible=True) as g_bash: | |
| bash_command = gr.Textbox( | |
| label="command", value="pwd && ls -la", lines=3 | |
| ) | |
| bash_timeout = gr.Number(label="timeout (seconds)", value=30) | |
| with gr.Group(visible=False) as g_read: | |
| read_path = gr.Textbox( | |
| label="file_path", value="/home/user/work/main.py" | |
| ) | |
| with gr.Row(): | |
| read_offset = gr.Number(label="offset (optional)", value=None) | |
| read_limit = gr.Number(label="limit (optional)", value=None) | |
| with gr.Group(visible=False) as g_write: | |
| write_path = gr.Textbox( | |
| label="file_path", value="/home/user/work/main.py" | |
| ) | |
| write_content = gr.Textbox(label="content", lines=8) | |
| with gr.Group(visible=False) as g_edit: | |
| edit_path = gr.Textbox( | |
| label="file_path", value="/home/user/work/main.py" | |
| ) | |
| edit_old = gr.Textbox(label="old_string", lines=3) | |
| edit_new = gr.Textbox(label="new_string", lines=3) | |
| edit_replace_all = gr.Checkbox(label="replace_all", value=False) | |
| with gr.Group(visible=False) as g_multi_edit: | |
| multi_edit_path = gr.Textbox( | |
| label="file_path", value="/home/user/work/main.py" | |
| ) | |
| multi_edit_json = gr.Code( | |
| label="edits (JSON array)", | |
| language="json", | |
| value=( | |
| '[\n' | |
| ' {"old_string": "TODO", "new_string": "DONE", "replace_all": false}\n' | |
| ']' | |
| ), | |
| lines=8, | |
| ) | |
| with gr.Group(visible=False) as g_glob: | |
| glob_pattern = gr.Textbox(label="pattern", value="**/*.py") | |
| glob_path = gr.Textbox( | |
| label="path (optional)", value="/home/user/work" | |
| ) | |
| with gr.Group(visible=False) as g_grep: | |
| grep_pattern = gr.Textbox(label="pattern", value="TODO") | |
| grep_path = gr.Textbox( | |
| label="path (optional)", value="/home/user/work" | |
| ) | |
| grep_include = gr.Textbox( | |
| label="include (optional file glob)", value="" | |
| ) | |
| with gr.Group(visible=False) as g_ls: | |
| ls_path = gr.Textbox(label="path", value="/home/user/work") | |
| ls_ignore = gr.Textbox( | |
| label="ignore (comma-separated globs)", value="" | |
| ) | |
| with gr.Group(visible=False) as g_todo: | |
| todo_json = gr.Code( | |
| label="todos (JSON array)", | |
| language="json", | |
| value=( | |
| '[\n' | |
| ' {"id":"1","content":"Inspect files",' | |
| '"status":"in_progress","priority":"high"}\n' | |
| ']' | |
| ), | |
| lines=8, | |
| ) | |
| with gr.Group(visible=False) as g_submit: | |
| gr.Markdown( | |
| "_Submit runs the configured verify commands and ends the " | |
| "episode with a reward._" | |
| ) | |
| run_btn = gr.Button("βΆοΈ Run tool", variant="primary", size="lg") | |
| # βββββββββ Right: Output + State βββββββββ | |
| with gr.Column(scale=2, min_width=420): | |
| gr.Markdown("### Output") | |
| output_status = gr.Markdown("_Awaiting first call._") | |
| output_view = gr.Code( | |
| label="Tool output (text)", | |
| language=None, | |
| value="", | |
| lines=14, | |
| ) | |
| with gr.Accordion("Raw step response", open=False): | |
| raw_response = gr.Code( | |
| label="raw JSON", | |
| language="json", | |
| value="", | |
| lines=14, | |
| ) | |
| gr.Markdown("### State") | |
| with gr.Tabs(): | |
| with gr.Tab("Summary"): | |
| state_summary = gr.Markdown(_format_status({})) | |
| history_table = gr.Dataframe( | |
| headers=["β", "tool", "output"], | |
| datatype=["str", "str", "str"], | |
| row_count=(0, "dynamic"), | |
| wrap=True, | |
| label="Tool history (latest 25)", | |
| ) | |
| with gr.Tab("JSON"): | |
| state_json = gr.Code( | |
| label="full session state", | |
| language="json", | |
| value="{}", | |
| lines=18, | |
| ) | |
| # βββββββββ Tool dropdown β input form visibility βββββββββ | |
| TOOL_GROUPS = { | |
| "bash": g_bash, | |
| "read": g_read, | |
| "write": g_write, | |
| "edit": g_edit, | |
| "multi_edit": g_multi_edit, | |
| "glob": g_glob, | |
| "grep": g_grep, | |
| "ls": g_ls, | |
| "todo_write": g_todo, | |
| "submit_solution": g_submit, | |
| } | |
| group_components = list(TOOL_GROUPS.values()) | |
| def on_tool_change(tool: str): | |
| spec = TOOL_SPECS.get(tool, {}) | |
| help_md = ( | |
| f"**{spec.get('signature', tool)}** \n{spec.get('description', '')}" | |
| ) | |
| updates = [gr.update(visible=(name == tool)) for name in TOOL_GROUPS] | |
| return [help_md, *updates] | |
| tool_dropdown.change( | |
| on_tool_change, inputs=[tool_dropdown], outputs=[tool_help, *group_components] | |
| ) | |
| # βββββββββ Result rendering helper βββββββββ | |
| def render_result(tool: str, raw: dict[str, Any]) -> tuple[str, str, str, str, str, list[list[str]]]: | |
| text = _extract_tool_text(raw) | |
| is_error = _extract_tool_error(raw) or text.startswith("ERROR:") or text.startswith("Error:") | |
| badge = "β error" if is_error else "β ok" | |
| status_line = f"**{tool}** β {badge}" | |
| state = state_payload() | |
| return ( | |
| status_line, # output_status | |
| text, # output_view | |
| json.dumps(raw, indent=2), # raw_response | |
| _format_status(state), # state_summary (top + summary panel β same content) | |
| json.dumps(state, indent=2, default=str), # state_json | |
| _format_history(state), # history_table | |
| ) | |
| # βββββββββ Session handlers βββββββββ | |
| async def on_reset(setup_text: str, verify_text: str): | |
| raw = await web_manager.reset_environment( | |
| {"setup": _lines(setup_text), "verify": _lines(verify_text)} | |
| ) | |
| state = state_payload() | |
| obs = raw.get("observation", {}) if isinstance(raw, dict) else {} | |
| meta = obs.get("metadata", {}) if isinstance(obs, dict) else {} | |
| err = meta.get("error") if isinstance(meta, dict) else None | |
| sandbox_id = state.get("sandbox_id") or meta.get("sandbox_id") or "β" | |
| if err: | |
| status_line = f"**reset** β β {err}" | |
| text = err | |
| else: | |
| status_line = f"**reset** β β sandbox `{sandbox_id}` ready" | |
| text = f"Sandbox ready: {sandbox_id}" | |
| return ( | |
| _format_status(state), | |
| status_line, | |
| text, | |
| json.dumps(raw, indent=2), | |
| _format_status(state), | |
| json.dumps(state, indent=2, default=str), | |
| _format_history(state), | |
| ) | |
| async def on_close(): | |
| await web_manager._run_sync_in_thread_pool(web_manager.env.close) | |
| return ( | |
| _format_status({}), | |
| "**close** β π session closed", | |
| "Session closed.", | |
| "{}", | |
| _format_status({}), | |
| "{}", | |
| [], | |
| ) | |
| # βββββββββ Universal run handler βββββββββ | |
| async def on_run( | |
| tool: str, | |
| # bash | |
| bash_command: str, bash_timeout: float, | |
| # read | |
| read_path: str, read_offset: float | None, read_limit: float | None, | |
| # write | |
| write_path: str, write_content: str, | |
| # edit | |
| edit_path: str, edit_old: str, edit_new: str, edit_replace_all: bool, | |
| # multi_edit | |
| multi_edit_path: str, multi_edit_json: str, | |
| # glob | |
| glob_pattern: str, glob_path: str, | |
| # grep | |
| grep_pattern: str, grep_path: str, grep_include: str, | |
| # ls | |
| ls_path: str, ls_ignore: str, | |
| # todo_write | |
| todo_json: str, | |
| ): | |
| try: | |
| if tool == "bash": | |
| args = { | |
| "command": bash_command, | |
| "timeout": float(bash_timeout) if bash_timeout else 30, | |
| } | |
| elif tool == "read": | |
| args = {"file_path": read_path} | |
| if read_offset not in (None, "", 0): | |
| args["offset"] = int(read_offset) | |
| if read_limit not in (None, ""): | |
| args["limit"] = int(read_limit) | |
| elif tool == "write": | |
| args = {"file_path": write_path, "content": write_content} | |
| elif tool == "edit": | |
| args = { | |
| "file_path": edit_path, | |
| "old_string": edit_old, | |
| "new_string": edit_new, | |
| "replace_all": bool(edit_replace_all), | |
| } | |
| elif tool == "multi_edit": | |
| args = { | |
| "file_path": multi_edit_path, | |
| "edits": json.loads(multi_edit_json), | |
| } | |
| elif tool == "glob": | |
| args = {"pattern": glob_pattern} | |
| if glob_path.strip(): | |
| args["path"] = glob_path | |
| elif tool == "grep": | |
| args = {"pattern": grep_pattern} | |
| if grep_path.strip(): | |
| args["path"] = grep_path | |
| if grep_include.strip(): | |
| args["include"] = grep_include | |
| elif tool == "ls": | |
| args = {"path": ls_path or "."} | |
| ignore = _csv_list(ls_ignore) | |
| if ignore: | |
| args["ignore"] = ignore | |
| elif tool == "todo_write": | |
| args = {"todos": json.loads(todo_json)} | |
| elif tool == "submit_solution": | |
| args = {} | |
| else: | |
| return ( | |
| _format_status(state_payload()), | |
| f"**{tool}** β β unknown tool", | |
| f"unknown tool: {tool}", | |
| "{}", | |
| _format_status(state_payload()), | |
| "{}", | |
| [], | |
| ) | |
| except (json.JSONDecodeError, ValueError) as exc: | |
| state = state_payload() | |
| return ( | |
| _format_status(state), | |
| f"**{tool}** β β bad input: {exc}", | |
| f"Input parse error: {exc}", | |
| "{}", | |
| _format_status(state), | |
| json.dumps(state, indent=2, default=str), | |
| _format_history(state), | |
| ) | |
| raw = await web_manager.step_environment( | |
| {"tool_name": tool, "arguments": args} | |
| ) | |
| state = state_payload() | |
| rendered = render_result(tool, raw) | |
| return (_format_status(state), *rendered) | |
| # βββββββββ Wire up events βββββββββ | |
| all_inputs = [ | |
| tool_dropdown, | |
| bash_command, bash_timeout, | |
| read_path, read_offset, read_limit, | |
| write_path, write_content, | |
| edit_path, edit_old, edit_new, edit_replace_all, | |
| multi_edit_path, multi_edit_json, | |
| glob_pattern, glob_path, | |
| grep_pattern, grep_path, grep_include, | |
| ls_path, ls_ignore, | |
| todo_json, | |
| ] | |
| all_outputs = [ | |
| status_md, | |
| output_status, | |
| output_view, | |
| raw_response, | |
| state_summary, | |
| state_json, | |
| history_table, | |
| ] | |
| run_btn.click(on_run, inputs=all_inputs, outputs=all_outputs) | |
| reset_btn.click( | |
| on_reset, inputs=[setup_input, verify_input], outputs=all_outputs | |
| ) | |
| close_btn.click(on_close, outputs=all_outputs) | |
| return demo | |