Spaces:
Running
on
Zero
Running
on
Zero
update app
Browse files
app.py
CHANGED
|
@@ -192,6 +192,18 @@ class FaraLocalModel(Model):
|
|
| 192 |
# SELENIUM CHROME SANDBOX
|
| 193 |
# -----------------------------------------------------------------------------
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
class SeleniumSandbox:
|
| 196 |
def __init__(self, width=1024, height=768):
|
| 197 |
self.width = width
|
|
@@ -200,28 +212,37 @@ class SeleniumSandbox:
|
|
| 200 |
|
| 201 |
# Setup Chrome Options
|
| 202 |
chrome_opts = ChromeOptions()
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
chrome_opts.add_argument(f"--user-data-dir={self.tmp_dir}")
|
| 205 |
chrome_opts.add_argument(f"--window-size={width},{height}")
|
| 206 |
-
chrome_opts.add_argument("--no-sandbox")
|
| 207 |
-
chrome_opts.add_argument("--disable-dev-shm-usage")
|
| 208 |
-
chrome_opts.add_argument("--disable-extensions")
|
| 209 |
chrome_opts.add_argument("--disable-gpu")
|
| 210 |
-
chrome_opts.add_argument("--
|
| 211 |
-
chrome_opts.add_argument("--hide-scrollbars")
|
| 212 |
|
| 213 |
# Initialize Driver
|
| 214 |
try:
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
|
|
|
|
|
|
| 221 |
|
| 222 |
-
|
|
|
|
| 223 |
self.driver.get("about:blank")
|
| 224 |
-
print(f"Selenium Chrome Driver started
|
|
|
|
| 225 |
except Exception as e:
|
| 226 |
print(f"Failed to initialize Selenium: {e}")
|
| 227 |
self.cleanup()
|
|
@@ -233,25 +254,17 @@ class SeleniumSandbox:
|
|
| 233 |
return Image.open(BytesIO(png_data))
|
| 234 |
|
| 235 |
def move_mouse_and_click(self, x, y, click_type="left"):
|
| 236 |
-
"""
|
| 237 |
-
Simulate mouse movement and click using ActionChains.
|
| 238 |
-
Note: Selenium tracks state, so we move relative to the 'body' tag to ensure absolute positioning simulation.
|
| 239 |
-
"""
|
| 240 |
try:
|
| 241 |
body = self.driver.find_element(By.TAG_NAME, "body")
|
| 242 |
actions = ActionChains(self.driver)
|
| 243 |
-
|
| 244 |
-
# Move to 0,0 of body, then offset
|
| 245 |
actions.move_to_element_with_offset(body, 0, 0)
|
| 246 |
actions.move_by_offset(x, y)
|
| 247 |
-
|
| 248 |
if click_type == "left":
|
| 249 |
actions.click()
|
| 250 |
elif click_type == "right":
|
| 251 |
actions.context_click()
|
| 252 |
elif click_type == "double":
|
| 253 |
actions.double_click()
|
| 254 |
-
|
| 255 |
actions.perform()
|
| 256 |
except Exception as e:
|
| 257 |
print(f"Error in move_mouse_and_click: {e}")
|
|
@@ -260,12 +273,9 @@ class SeleniumSandbox:
|
|
| 260 |
try:
|
| 261 |
body = self.driver.find_element(By.TAG_NAME, "body")
|
| 262 |
actions = ActionChains(self.driver)
|
| 263 |
-
|
| 264 |
actions.move_to_element_with_offset(body, 0, 0)
|
| 265 |
actions.move_by_offset(x1, y1)
|
| 266 |
actions.click_and_hold()
|
| 267 |
-
|
| 268 |
-
# Move relative from x1,y1 to x2,y2
|
| 269 |
actions.move_by_offset(x2 - x1, y2 - y1)
|
| 270 |
actions.release()
|
| 271 |
actions.perform()
|
|
@@ -281,13 +291,11 @@ class SeleniumSandbox:
|
|
| 281 |
try:
|
| 282 |
k = getattr(Keys, key_name.upper(), None)
|
| 283 |
if not k:
|
| 284 |
-
# Handle common overrides
|
| 285 |
if key_name.lower() == "enter": k = Keys.ENTER
|
| 286 |
elif key_name.lower() == "space": k = Keys.SPACE
|
| 287 |
elif key_name.lower() == "backspace": k = Keys.BACK_SPACE
|
| 288 |
elif key_name.lower() == "esc": k = Keys.ESCAPE
|
| 289 |
-
else: k = key_name
|
| 290 |
-
|
| 291 |
actions = ActionChains(self.driver)
|
| 292 |
actions.send_keys(k)
|
| 293 |
actions.perform()
|
|
@@ -295,9 +303,8 @@ class SeleniumSandbox:
|
|
| 295 |
print(f"Error pressing key: {e}")
|
| 296 |
|
| 297 |
def scroll(self, amount, direction="down"):
|
| 298 |
-
# Selenium doesn't have a great scroll wheel primitive, use JS
|
| 299 |
try:
|
| 300 |
-
scroll_y = amount * 100
|
| 301 |
if direction == "up":
|
| 302 |
scroll_y = -scroll_y
|
| 303 |
self.driver.execute_script(f"window.scrollBy(0, {scroll_y});")
|
|
@@ -604,7 +611,6 @@ def save_final_status(folder, status: str, summary, error_message=None) -> None:
|
|
| 604 |
# UI & APP
|
| 605 |
# -----------------------------------------------------------------------------
|
| 606 |
|
| 607 |
-
# CSS
|
| 608 |
custom_css = """
|
| 609 |
.modal-container { margin: var(--size-16) auto!important; }
|
| 610 |
.browser-container { position: relative; width: 100%; height: 600px; border: 1px solid #444; background: #222; display: flex; align-items: center; justify-content: center; overflow: hidden; }
|
|
@@ -612,13 +618,6 @@ custom_css = """
|
|
| 612 |
#chatbot { height: 800px!important; }
|
| 613 |
"""
|
| 614 |
|
| 615 |
-
# HTML Template for the output area (Replaced IFrame with simple image holder logic handled by Gradio Image)
|
| 616 |
-
browser_html = """
|
| 617 |
-
<div class="browser-container">
|
| 618 |
-
<p style="color: #888;">Browser Screenshot will appear here after steps.</p>
|
| 619 |
-
</div>
|
| 620 |
-
"""
|
| 621 |
-
|
| 622 |
class EnrichedGradioUI(GradioUI):
|
| 623 |
def interact_with_agent(
|
| 624 |
self,
|
|
@@ -632,7 +631,6 @@ class EnrichedGradioUI(GradioUI):
|
|
| 632 |
interaction_id = generate_interaction_id(session_uuid)
|
| 633 |
data_dir = os.path.join(TMP_DIR, interaction_id)
|
| 634 |
|
| 635 |
-
# Initialize Sandbox per run (clean slate)
|
| 636 |
sandbox = SeleniumSandbox(width=WIDTH, height=HEIGHT)
|
| 637 |
agent = create_agent(data_dir=data_dir, sandbox=sandbox)
|
| 638 |
session_state["agent"] = agent
|
|
@@ -641,19 +639,15 @@ class EnrichedGradioUI(GradioUI):
|
|
| 641 |
stored_messages.append(gr.ChatMessage(role="user", content=task_input))
|
| 642 |
yield stored_messages, None
|
| 643 |
|
| 644 |
-
# Initial screenshot
|
| 645 |
screenshot = sandbox.get_screenshot()
|
| 646 |
|
| 647 |
-
# Run Agent
|
| 648 |
for msg in stream_to_gradio(
|
| 649 |
agent,
|
| 650 |
task=task_input,
|
| 651 |
task_images=[screenshot],
|
| 652 |
reset_agent_memory=False,
|
| 653 |
):
|
| 654 |
-
# Update Chat
|
| 655 |
if hasattr(agent, "last_marked_screenshot") and msg.content == "-----":
|
| 656 |
-
# Add image to chat
|
| 657 |
stored_messages.append(
|
| 658 |
gr.ChatMessage(
|
| 659 |
role="assistant",
|
|
@@ -663,11 +657,10 @@ class EnrichedGradioUI(GradioUI):
|
|
| 663 |
},
|
| 664 |
)
|
| 665 |
)
|
| 666 |
-
# Yield updated chat AND the latest screenshot for the side panel
|
| 667 |
yield stored_messages, agent.last_marked_screenshot.to_string()
|
| 668 |
else:
|
| 669 |
stored_messages.append(msg)
|
| 670 |
-
yield stored_messages, None
|
| 671 |
|
| 672 |
if consent_storage:
|
| 673 |
summary = get_agent_summary_erase_images(agent)
|
|
@@ -683,10 +676,8 @@ class EnrichedGradioUI(GradioUI):
|
|
| 683 |
)
|
| 684 |
yield stored_messages, None
|
| 685 |
finally:
|
| 686 |
-
# Important: Cleanup Sandbox
|
| 687 |
sandbox.cleanup()
|
| 688 |
|
| 689 |
-
# Gradio Block Construction
|
| 690 |
theme = gr.themes.Default(
|
| 691 |
font=["Oxanium", "sans-serif"], primary_hue="amber", secondary_hue="blue"
|
| 692 |
)
|
|
@@ -697,7 +688,6 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
|
|
| 697 |
stored_messages = gr.State([])
|
| 698 |
|
| 699 |
with gr.Row():
|
| 700 |
-
# Left Sidebar: Inputs
|
| 701 |
with gr.Column(scale=1):
|
| 702 |
gr.Markdown("### Fara CUA - Chrome Agent 🌐")
|
| 703 |
|
|
@@ -708,22 +698,18 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
|
|
| 708 |
)
|
| 709 |
run_btn = gr.Button("Start Task", variant="primary")
|
| 710 |
stop_btn = gr.Button("Stop", variant="secondary")
|
| 711 |
-
|
| 712 |
consent_storage = gr.Checkbox(label="Save logs locally?", value=True)
|
| 713 |
|
| 714 |
gr.Examples(
|
| 715 |
examples=[
|
| 716 |
"Go to google.com and search for 'Hugging Face', then click the first link.",
|
| 717 |
"Go to wikipedia.org, type 'Python' in search, and click the search button.",
|
| 718 |
-
"Open huggingface.co and find the 'Spaces' link."
|
| 719 |
],
|
| 720 |
inputs=task_input
|
| 721 |
)
|
| 722 |
|
| 723 |
-
# Right Main: Chat & Live View
|
| 724 |
with gr.Column(scale=3):
|
| 725 |
with gr.Row():
|
| 726 |
-
# Side-by-side: Chat and Latest Screenshot
|
| 727 |
with gr.Column(scale=1):
|
| 728 |
chatbot_display = gr.Chatbot(
|
| 729 |
label="Agent Trace",
|
|
@@ -740,7 +726,6 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
|
|
| 740 |
height=600
|
| 741 |
)
|
| 742 |
|
| 743 |
-
# UI Handler
|
| 744 |
agent_ui = EnrichedGradioUI(CodeAgent(tools=[], model=Model(), name="init"))
|
| 745 |
|
| 746 |
def interrupt_agent(session_state):
|
|
@@ -748,7 +733,6 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
|
|
| 748 |
session_state["agent"].interrupt_switch = True
|
| 749 |
return "Interrupted"
|
| 750 |
|
| 751 |
-
# Run Logic
|
| 752 |
run_event = run_btn.click(
|
| 753 |
fn=agent_ui.interact_with_agent,
|
| 754 |
inputs=[
|
|
@@ -764,4 +748,4 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
|
|
| 764 |
stop_btn.click(fn=interrupt_agent, inputs=[session_state], outputs=[])
|
| 765 |
|
| 766 |
if __name__ == "__main__":
|
| 767 |
-
demo.launch()
|
|
|
|
| 192 |
# SELENIUM CHROME SANDBOX
|
| 193 |
# -----------------------------------------------------------------------------
|
| 194 |
|
| 195 |
+
def get_system_chrome_path():
|
| 196 |
+
# Common paths for chromium in Linux/HF Spaces
|
| 197 |
+
paths = [
|
| 198 |
+
"/usr/bin/chromium",
|
| 199 |
+
"/usr/bin/chromium-browser",
|
| 200 |
+
"/usr/bin/google-chrome",
|
| 201 |
+
]
|
| 202 |
+
for p in paths:
|
| 203 |
+
if os.path.exists(p):
|
| 204 |
+
return p
|
| 205 |
+
return None
|
| 206 |
+
|
| 207 |
class SeleniumSandbox:
|
| 208 |
def __init__(self, width=1024, height=768):
|
| 209 |
self.width = width
|
|
|
|
| 212 |
|
| 213 |
# Setup Chrome Options
|
| 214 |
chrome_opts = ChromeOptions()
|
| 215 |
+
|
| 216 |
+
# Use system binary if available (fixes status 127 in HF Spaces)
|
| 217 |
+
binary_path = get_system_chrome_path()
|
| 218 |
+
if binary_path:
|
| 219 |
+
print(f"Using system Chrome binary at: {binary_path}")
|
| 220 |
+
chrome_opts.binary_location = binary_path
|
| 221 |
+
|
| 222 |
+
chrome_opts.add_argument("--headless=new")
|
| 223 |
chrome_opts.add_argument(f"--user-data-dir={self.tmp_dir}")
|
| 224 |
chrome_opts.add_argument(f"--window-size={width},{height}")
|
| 225 |
+
chrome_opts.add_argument("--no-sandbox") # Crucial for containers
|
| 226 |
+
chrome_opts.add_argument("--disable-dev-shm-usage") # Crucial for containers
|
|
|
|
| 227 |
chrome_opts.add_argument("--disable-gpu")
|
| 228 |
+
chrome_opts.add_argument("--disable-extensions")
|
|
|
|
| 229 |
|
| 230 |
# Initialize Driver
|
| 231 |
try:
|
| 232 |
+
# Check for system driver first
|
| 233 |
+
system_driver_path = "/usr/bin/chromedriver"
|
| 234 |
+
if os.path.exists(system_driver_path):
|
| 235 |
+
print(f"Using system ChromeDriver at: {system_driver_path}")
|
| 236 |
+
service = ChromeService(executable_path=system_driver_path)
|
| 237 |
+
else:
|
| 238 |
+
print("Using webdriver_manager to install ChromeDriver...")
|
| 239 |
+
service = ChromeService(ChromeDriverManager().install())
|
| 240 |
|
| 241 |
+
self.driver = webdriver.Chrome(service=service, options=chrome_opts)
|
| 242 |
+
self.driver.set_window_size(width, height)
|
| 243 |
self.driver.get("about:blank")
|
| 244 |
+
print(f"Selenium Chrome Driver started successfully.")
|
| 245 |
+
|
| 246 |
except Exception as e:
|
| 247 |
print(f"Failed to initialize Selenium: {e}")
|
| 248 |
self.cleanup()
|
|
|
|
| 254 |
return Image.open(BytesIO(png_data))
|
| 255 |
|
| 256 |
def move_mouse_and_click(self, x, y, click_type="left"):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
try:
|
| 258 |
body = self.driver.find_element(By.TAG_NAME, "body")
|
| 259 |
actions = ActionChains(self.driver)
|
|
|
|
|
|
|
| 260 |
actions.move_to_element_with_offset(body, 0, 0)
|
| 261 |
actions.move_by_offset(x, y)
|
|
|
|
| 262 |
if click_type == "left":
|
| 263 |
actions.click()
|
| 264 |
elif click_type == "right":
|
| 265 |
actions.context_click()
|
| 266 |
elif click_type == "double":
|
| 267 |
actions.double_click()
|
|
|
|
| 268 |
actions.perform()
|
| 269 |
except Exception as e:
|
| 270 |
print(f"Error in move_mouse_and_click: {e}")
|
|
|
|
| 273 |
try:
|
| 274 |
body = self.driver.find_element(By.TAG_NAME, "body")
|
| 275 |
actions = ActionChains(self.driver)
|
|
|
|
| 276 |
actions.move_to_element_with_offset(body, 0, 0)
|
| 277 |
actions.move_by_offset(x1, y1)
|
| 278 |
actions.click_and_hold()
|
|
|
|
|
|
|
| 279 |
actions.move_by_offset(x2 - x1, y2 - y1)
|
| 280 |
actions.release()
|
| 281 |
actions.perform()
|
|
|
|
| 291 |
try:
|
| 292 |
k = getattr(Keys, key_name.upper(), None)
|
| 293 |
if not k:
|
|
|
|
| 294 |
if key_name.lower() == "enter": k = Keys.ENTER
|
| 295 |
elif key_name.lower() == "space": k = Keys.SPACE
|
| 296 |
elif key_name.lower() == "backspace": k = Keys.BACK_SPACE
|
| 297 |
elif key_name.lower() == "esc": k = Keys.ESCAPE
|
| 298 |
+
else: k = key_name
|
|
|
|
| 299 |
actions = ActionChains(self.driver)
|
| 300 |
actions.send_keys(k)
|
| 301 |
actions.perform()
|
|
|
|
| 303 |
print(f"Error pressing key: {e}")
|
| 304 |
|
| 305 |
def scroll(self, amount, direction="down"):
|
|
|
|
| 306 |
try:
|
| 307 |
+
scroll_y = amount * 100
|
| 308 |
if direction == "up":
|
| 309 |
scroll_y = -scroll_y
|
| 310 |
self.driver.execute_script(f"window.scrollBy(0, {scroll_y});")
|
|
|
|
| 611 |
# UI & APP
|
| 612 |
# -----------------------------------------------------------------------------
|
| 613 |
|
|
|
|
| 614 |
custom_css = """
|
| 615 |
.modal-container { margin: var(--size-16) auto!important; }
|
| 616 |
.browser-container { position: relative; width: 100%; height: 600px; border: 1px solid #444; background: #222; display: flex; align-items: center; justify-content: center; overflow: hidden; }
|
|
|
|
| 618 |
#chatbot { height: 800px!important; }
|
| 619 |
"""
|
| 620 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
class EnrichedGradioUI(GradioUI):
|
| 622 |
def interact_with_agent(
|
| 623 |
self,
|
|
|
|
| 631 |
interaction_id = generate_interaction_id(session_uuid)
|
| 632 |
data_dir = os.path.join(TMP_DIR, interaction_id)
|
| 633 |
|
|
|
|
| 634 |
sandbox = SeleniumSandbox(width=WIDTH, height=HEIGHT)
|
| 635 |
agent = create_agent(data_dir=data_dir, sandbox=sandbox)
|
| 636 |
session_state["agent"] = agent
|
|
|
|
| 639 |
stored_messages.append(gr.ChatMessage(role="user", content=task_input))
|
| 640 |
yield stored_messages, None
|
| 641 |
|
|
|
|
| 642 |
screenshot = sandbox.get_screenshot()
|
| 643 |
|
|
|
|
| 644 |
for msg in stream_to_gradio(
|
| 645 |
agent,
|
| 646 |
task=task_input,
|
| 647 |
task_images=[screenshot],
|
| 648 |
reset_agent_memory=False,
|
| 649 |
):
|
|
|
|
| 650 |
if hasattr(agent, "last_marked_screenshot") and msg.content == "-----":
|
|
|
|
| 651 |
stored_messages.append(
|
| 652 |
gr.ChatMessage(
|
| 653 |
role="assistant",
|
|
|
|
| 657 |
},
|
| 658 |
)
|
| 659 |
)
|
|
|
|
| 660 |
yield stored_messages, agent.last_marked_screenshot.to_string()
|
| 661 |
else:
|
| 662 |
stored_messages.append(msg)
|
| 663 |
+
yield stored_messages, None
|
| 664 |
|
| 665 |
if consent_storage:
|
| 666 |
summary = get_agent_summary_erase_images(agent)
|
|
|
|
| 676 |
)
|
| 677 |
yield stored_messages, None
|
| 678 |
finally:
|
|
|
|
| 679 |
sandbox.cleanup()
|
| 680 |
|
|
|
|
| 681 |
theme = gr.themes.Default(
|
| 682 |
font=["Oxanium", "sans-serif"], primary_hue="amber", secondary_hue="blue"
|
| 683 |
)
|
|
|
|
| 688 |
stored_messages = gr.State([])
|
| 689 |
|
| 690 |
with gr.Row():
|
|
|
|
| 691 |
with gr.Column(scale=1):
|
| 692 |
gr.Markdown("### Fara CUA - Chrome Agent 🌐")
|
| 693 |
|
|
|
|
| 698 |
)
|
| 699 |
run_btn = gr.Button("Start Task", variant="primary")
|
| 700 |
stop_btn = gr.Button("Stop", variant="secondary")
|
|
|
|
| 701 |
consent_storage = gr.Checkbox(label="Save logs locally?", value=True)
|
| 702 |
|
| 703 |
gr.Examples(
|
| 704 |
examples=[
|
| 705 |
"Go to google.com and search for 'Hugging Face', then click the first link.",
|
| 706 |
"Go to wikipedia.org, type 'Python' in search, and click the search button.",
|
|
|
|
| 707 |
],
|
| 708 |
inputs=task_input
|
| 709 |
)
|
| 710 |
|
|
|
|
| 711 |
with gr.Column(scale=3):
|
| 712 |
with gr.Row():
|
|
|
|
| 713 |
with gr.Column(scale=1):
|
| 714 |
chatbot_display = gr.Chatbot(
|
| 715 |
label="Agent Trace",
|
|
|
|
| 726 |
height=600
|
| 727 |
)
|
| 728 |
|
|
|
|
| 729 |
agent_ui = EnrichedGradioUI(CodeAgent(tools=[], model=Model(), name="init"))
|
| 730 |
|
| 731 |
def interrupt_agent(session_state):
|
|
|
|
| 733 |
session_state["agent"].interrupt_switch = True
|
| 734 |
return "Interrupted"
|
| 735 |
|
|
|
|
| 736 |
run_event = run_btn.click(
|
| 737 |
fn=agent_ui.interact_with_agent,
|
| 738 |
inputs=[
|
|
|
|
| 748 |
stop_btn.click(fn=interrupt_agent, inputs=[session_state], outputs=[])
|
| 749 |
|
| 750 |
if __name__ == "__main__":
|
| 751 |
+
demo.launch(share=True)
|