Start replay
Browse files- app.py +33 -15
- model_replay.py +90 -0
app.py
CHANGED
|
@@ -12,7 +12,7 @@ from e2b_desktop import Sandbox
|
|
| 12 |
from smolagents import CodeAgent
|
| 13 |
from smolagents.monitoring import LogLevel
|
| 14 |
from smolagents.gradio_ui import GradioUI, stream_to_gradio
|
| 15 |
-
|
| 16 |
|
| 17 |
from e2bqwen import QwenVLAPIModel, E2BVisionAgent
|
| 18 |
|
|
@@ -29,7 +29,6 @@ if not os.path.exists(TMP_DIR):
|
|
| 29 |
hf_token = os.getenv("HUGGINGFACE_API_KEY")
|
| 30 |
login(token=hf_token)
|
| 31 |
|
| 32 |
-
|
| 33 |
custom_css = """
|
| 34 |
.sandbox-container {
|
| 35 |
position: relative;
|
|
@@ -471,6 +470,7 @@ def create_agent(data_dir, desktop):
|
|
| 471 |
planning_interval=10,
|
| 472 |
)
|
| 473 |
|
|
|
|
| 474 |
class EnrichedGradioUI(GradioUI):
|
| 475 |
def log_user_message(self, text_input):
|
| 476 |
import gradio as gr
|
|
@@ -480,7 +480,7 @@ class EnrichedGradioUI(GradioUI):
|
|
| 480 |
gr.Button(interactive=False),
|
| 481 |
)
|
| 482 |
|
| 483 |
-
def interact_with_agent(self, task_input, stored_messages, session_state, session_hash, request: gr.Request):
|
| 484 |
import gradio as gr
|
| 485 |
|
| 486 |
interaction_id = generate_interaction_id(request)
|
|
@@ -491,11 +491,14 @@ class EnrichedGradioUI(GradioUI):
|
|
| 491 |
if not os.path.exists(data_dir):
|
| 492 |
os.makedirs(data_dir)
|
| 493 |
|
| 494 |
-
|
| 495 |
if "agent" in session_state:
|
| 496 |
session_state["agent"].data_dir = data_dir # Update data dir to new interaction
|
| 497 |
else:
|
| 498 |
session_state["agent"] = create_agent(data_dir=data_dir, desktop=desktop)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 499 |
|
| 500 |
try:
|
| 501 |
stored_messages.append(gr.ChatMessage(role="user", content=task_input))
|
|
@@ -528,6 +531,8 @@ class EnrichedGradioUI(GradioUI):
|
|
| 528 |
save_final_status(data_dir, "failed", summary=[], error_message=error_message)
|
| 529 |
|
| 530 |
finally:
|
|
|
|
|
|
|
| 531 |
upload_to_hf_and_remove(data_dir)
|
| 532 |
|
| 533 |
theme = gr.themes.Default(font=["Oxanium", "sans-serif"], primary_hue="amber", secondary_hue="blue")
|
|
@@ -571,13 +576,8 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
|
|
| 571 |
session_state = gr.State({})
|
| 572 |
stored_messages = gr.State([])
|
| 573 |
|
| 574 |
-
with gr.Group(visible=False) as results_container:
|
| 575 |
-
results_output = gr.Textbox(
|
| 576 |
-
label="Results",
|
| 577 |
-
interactive=False,
|
| 578 |
-
elem_id="results-output"
|
| 579 |
-
)
|
| 580 |
|
|
|
|
| 581 |
|
| 582 |
minimalist_toggle = gr.Checkbox(label="Innie/Outie", value=False)
|
| 583 |
|
|
@@ -664,8 +664,8 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
|
|
| 664 |
|
| 665 |
# Function to set view-only mode
|
| 666 |
def clear_and_set_view_only(task_input, request: gr.Request):
|
| 667 |
-
#
|
| 668 |
-
return
|
| 669 |
|
| 670 |
def set_interactive(request: gr.Request):
|
| 671 |
return update_html(True, request)
|
|
@@ -676,11 +676,29 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
|
|
| 676 |
view_only_event = update_btn.click(
|
| 677 |
fn=clear_and_set_view_only,
|
| 678 |
inputs=[task_input],
|
| 679 |
-
outputs=[
|
| 680 |
-
).then(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
fn=set_interactive,
|
| 682 |
inputs=[],
|
| 683 |
-
outputs=sandbox_html
|
| 684 |
)
|
| 685 |
|
| 686 |
demo.load(
|
|
|
|
| 12 |
from smolagents import CodeAgent
|
| 13 |
from smolagents.monitoring import LogLevel
|
| 14 |
from smolagents.gradio_ui import GradioUI, stream_to_gradio
|
| 15 |
+
from model_replay import FakeModelClass
|
| 16 |
|
| 17 |
from e2bqwen import QwenVLAPIModel, E2BVisionAgent
|
| 18 |
|
|
|
|
| 29 |
hf_token = os.getenv("HUGGINGFACE_API_KEY")
|
| 30 |
login(token=hf_token)
|
| 31 |
|
|
|
|
| 32 |
custom_css = """
|
| 33 |
.sandbox-container {
|
| 34 |
position: relative;
|
|
|
|
| 470 |
planning_interval=10,
|
| 471 |
)
|
| 472 |
|
| 473 |
+
|
| 474 |
class EnrichedGradioUI(GradioUI):
|
| 475 |
def log_user_message(self, text_input):
|
| 476 |
import gradio as gr
|
|
|
|
| 480 |
gr.Button(interactive=False),
|
| 481 |
)
|
| 482 |
|
| 483 |
+
def interact_with_agent(self, task_input, stored_messages, session_state, session_hash, replay_log, request: gr.Request):
|
| 484 |
import gradio as gr
|
| 485 |
|
| 486 |
interaction_id = generate_interaction_id(request)
|
|
|
|
| 491 |
if not os.path.exists(data_dir):
|
| 492 |
os.makedirs(data_dir)
|
| 493 |
|
|
|
|
| 494 |
if "agent" in session_state:
|
| 495 |
session_state["agent"].data_dir = data_dir # Update data dir to new interaction
|
| 496 |
else:
|
| 497 |
session_state["agent"] = create_agent(data_dir=data_dir, desktop=desktop)
|
| 498 |
+
|
| 499 |
+
if replay_log is not None:
|
| 500 |
+
original_model = session_state["agent"].model
|
| 501 |
+
session_state["agent"].model = FakeModelReplayLog(replay_log)
|
| 502 |
|
| 503 |
try:
|
| 504 |
stored_messages.append(gr.ChatMessage(role="user", content=task_input))
|
|
|
|
| 531 |
save_final_status(data_dir, "failed", summary=[], error_message=error_message)
|
| 532 |
|
| 533 |
finally:
|
| 534 |
+
if replay_log: # Replace the model with original model
|
| 535 |
+
session_state["agent"].model = original_model
|
| 536 |
upload_to_hf_and_remove(data_dir)
|
| 537 |
|
| 538 |
theme = gr.themes.Default(font=["Oxanium", "sans-serif"], primary_hue="amber", secondary_hue="blue")
|
|
|
|
| 576 |
session_state = gr.State({})
|
| 577 |
stored_messages = gr.State([])
|
| 578 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 579 |
|
| 580 |
+
replay_btn = gr.Button("Replay an agent run")
|
| 581 |
|
| 582 |
minimalist_toggle = gr.Checkbox(label="Innie/Outie", value=False)
|
| 583 |
|
|
|
|
| 664 |
|
| 665 |
# Function to set view-only mode
|
| 666 |
def clear_and_set_view_only(task_input, request: gr.Request):
|
| 667 |
+
# set view-only mode
|
| 668 |
+
return update_html(False, request)
|
| 669 |
|
| 670 |
def set_interactive(request: gr.Request):
|
| 671 |
return update_html(True, request)
|
|
|
|
| 676 |
view_only_event = update_btn.click(
|
| 677 |
fn=clear_and_set_view_only,
|
| 678 |
inputs=[task_input],
|
| 679 |
+
outputs=[sandbox_html]
|
| 680 |
+
).then(
|
| 681 |
+
agent_ui.interact_with_agent,
|
| 682 |
+
inputs=[task_input, stored_messages, session_state, session_hash_state, None],
|
| 683 |
+
outputs=[chatbot_display]
|
| 684 |
+
).then(
|
| 685 |
+
fn=set_interactive,
|
| 686 |
+
inputs=[],
|
| 687 |
+
outputs=[sandbox_html]
|
| 688 |
+
)
|
| 689 |
+
|
| 690 |
+
replay_btn.click(
|
| 691 |
+
fn=clear_and_set_view_only,
|
| 692 |
+
inputs=[task_input],
|
| 693 |
+
outputs=[sandbox_html]
|
| 694 |
+
).then(
|
| 695 |
+
agent_ui.interact_with_agent,
|
| 696 |
+
inputs=[task_input, stored_messages, session_state, session_hash_state, "udupp2fyavq_1743170323"],
|
| 697 |
+
outputs=[chatbot_display]
|
| 698 |
+
).then(
|
| 699 |
fn=set_interactive,
|
| 700 |
inputs=[],
|
| 701 |
+
outputs=[sandbox_html]
|
| 702 |
)
|
| 703 |
|
| 704 |
demo.load(
|
model_replay.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents.models import Model, ChatMessage, Tool, MessageRole
|
| 2 |
+
from time import time
|
| 3 |
+
|
| 4 |
+
class FakeModelClass(Model):
|
| 5 |
+
"""A model class that returns pre-recorded responses from a log file.
|
| 6 |
+
|
| 7 |
+
This class is useful for testing and debugging purposes, as it doesn't make
|
| 8 |
+
actual API calls but instead returns responses from a pre-recorded log file.
|
| 9 |
+
|
| 10 |
+
Parameters:
|
| 11 |
+
log_url (str, optional):
|
| 12 |
+
URL to the log file. Defaults to the smolagents example log.
|
| 13 |
+
**kwargs: Additional keyword arguments passed to the Model base class.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
def __init__(
|
| 17 |
+
self,
|
| 18 |
+
log_folder: str,
|
| 19 |
+
**kwargs
|
| 20 |
+
):
|
| 21 |
+
super().__init__(**kwargs)
|
| 22 |
+
self.dataset_name = "smolagents/computer-agent-logs",
|
| 23 |
+
self.log_folder = log_folder
|
| 24 |
+
self.call_counter = 0
|
| 25 |
+
self.model_outputs = self._load_model_outputs()
|
| 26 |
+
|
| 27 |
+
def _load_model_outputs(self) -> List[str]:
|
| 28 |
+
"""Load model outputs from the log file using HuggingFace datasets library."""
|
| 29 |
+
# Download the file from Hugging Face Hub
|
| 30 |
+
file_path = hf_hub_download(
|
| 31 |
+
repo_id=self.dataset_name,
|
| 32 |
+
filename=self.log_folder + "/metadata.json",
|
| 33 |
+
repo_type="dataset"
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# Load and parse the JSON data
|
| 37 |
+
with open(file_path, 'r') as f:
|
| 38 |
+
log_data = json.load(f)
|
| 39 |
+
|
| 40 |
+
# Extract only the model_output from each step in tool_calls
|
| 41 |
+
model_outputs = []
|
| 42 |
+
|
| 43 |
+
for step in log_data.get("tool_calls", []):
|
| 44 |
+
if "model_output_message" in step:
|
| 45 |
+
model_outputs.append(step["model_output_message"])
|
| 46 |
+
|
| 47 |
+
print(f"Loaded {len(model_outputs)} model outputs from log file")
|
| 48 |
+
return model_outputs
|
| 49 |
+
|
| 50 |
+
def __call__(
|
| 51 |
+
self,
|
| 52 |
+
messages: List[Dict[str, str]],
|
| 53 |
+
stop_sequences: Optional[List[str]] = None,
|
| 54 |
+
grammar: Optional[str] = None,
|
| 55 |
+
tools_to_call_from: Optional[List[Tool]] = None,
|
| 56 |
+
**kwargs
|
| 57 |
+
) -> ChatMessage:
|
| 58 |
+
"""Return the next pre-recorded response from the log file.
|
| 59 |
+
|
| 60 |
+
Parameters:
|
| 61 |
+
messages: List of input messages (ignored).
|
| 62 |
+
stop_sequences: Optional list of stop sequences (ignored).
|
| 63 |
+
grammar: Optional grammar specification (ignored).
|
| 64 |
+
tools_to_call_from: Optional list of tools (ignored).
|
| 65 |
+
**kwargs: Additional keyword arguments (ignored).
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
ChatMessage: The next pre-recorded response.
|
| 69 |
+
"""
|
| 70 |
+
time.sleep(1.0)
|
| 71 |
+
|
| 72 |
+
# Get the next model output
|
| 73 |
+
if self.call_counter < len(self.model_outputs):
|
| 74 |
+
content = self.model_outputs[self.call_counter]
|
| 75 |
+
self.call_counter += 1
|
| 76 |
+
else:
|
| 77 |
+
content = "No more pre-recorded responses available."
|
| 78 |
+
|
| 79 |
+
# Token counts are simulated
|
| 80 |
+
self.last_input_token_count = len(str(messages)) // 4 # Rough approximation
|
| 81 |
+
self.last_output_token_count = len(content) // 4 # Rough approximation
|
| 82 |
+
|
| 83 |
+
# Create and return a ChatMessage
|
| 84 |
+
return ChatMessage(
|
| 85 |
+
role=MessageRole.ASSISTANT,
|
| 86 |
+
content=content,
|
| 87 |
+
tool_calls=None,
|
| 88 |
+
raw={"source": "pre-recorded log", "call_number": self.call_counter}
|
| 89 |
+
)
|
| 90 |
+
|