prithivMLmods commited on
Commit
5cfb40e
·
verified ·
1 Parent(s): 8bd0249

update app

Browse files
Files changed (1) hide show
  1. app.py +39 -55
app.py CHANGED
@@ -192,6 +192,18 @@ class FaraLocalModel(Model):
192
  # SELENIUM CHROME SANDBOX
193
  # -----------------------------------------------------------------------------
194
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  class SeleniumSandbox:
196
  def __init__(self, width=1024, height=768):
197
  self.width = width
@@ -200,28 +212,37 @@ class SeleniumSandbox:
200
 
201
  # Setup Chrome Options
202
  chrome_opts = ChromeOptions()
203
- chrome_opts.add_argument("--headless=new") # Run headless for Space compatibility
 
 
 
 
 
 
 
204
  chrome_opts.add_argument(f"--user-data-dir={self.tmp_dir}")
205
  chrome_opts.add_argument(f"--window-size={width},{height}")
206
- chrome_opts.add_argument("--no-sandbox")
207
- chrome_opts.add_argument("--disable-dev-shm-usage")
208
- chrome_opts.add_argument("--disable-extensions")
209
  chrome_opts.add_argument("--disable-gpu")
210
- chrome_opts.add_argument("--force-device-scale-factor=1")
211
- chrome_opts.add_argument("--hide-scrollbars")
212
 
213
  # Initialize Driver
214
  try:
215
- self.driver = webdriver.Chrome(
216
- service=ChromeService(ChromeDriverManager().install()),
217
- options=chrome_opts
218
- )
219
- # Ensure precise viewport size (excluding chrome UI overhead if any)
220
- self.driver.set_window_size(width, height)
 
 
221
 
222
- # Start with a blank page
 
223
  self.driver.get("about:blank")
224
- print(f"Selenium Chrome Driver started. Data dir: {self.tmp_dir}")
 
225
  except Exception as e:
226
  print(f"Failed to initialize Selenium: {e}")
227
  self.cleanup()
@@ -233,25 +254,17 @@ class SeleniumSandbox:
233
  return Image.open(BytesIO(png_data))
234
 
235
  def move_mouse_and_click(self, x, y, click_type="left"):
236
- """
237
- Simulate mouse movement and click using ActionChains.
238
- Note: Selenium tracks state, so we move relative to the 'body' tag to ensure absolute positioning simulation.
239
- """
240
  try:
241
  body = self.driver.find_element(By.TAG_NAME, "body")
242
  actions = ActionChains(self.driver)
243
-
244
- # Move to 0,0 of body, then offset
245
  actions.move_to_element_with_offset(body, 0, 0)
246
  actions.move_by_offset(x, y)
247
-
248
  if click_type == "left":
249
  actions.click()
250
  elif click_type == "right":
251
  actions.context_click()
252
  elif click_type == "double":
253
  actions.double_click()
254
-
255
  actions.perform()
256
  except Exception as e:
257
  print(f"Error in move_mouse_and_click: {e}")
@@ -260,12 +273,9 @@ class SeleniumSandbox:
260
  try:
261
  body = self.driver.find_element(By.TAG_NAME, "body")
262
  actions = ActionChains(self.driver)
263
-
264
  actions.move_to_element_with_offset(body, 0, 0)
265
  actions.move_by_offset(x1, y1)
266
  actions.click_and_hold()
267
-
268
- # Move relative from x1,y1 to x2,y2
269
  actions.move_by_offset(x2 - x1, y2 - y1)
270
  actions.release()
271
  actions.perform()
@@ -281,13 +291,11 @@ class SeleniumSandbox:
281
  try:
282
  k = getattr(Keys, key_name.upper(), None)
283
  if not k:
284
- # Handle common overrides
285
  if key_name.lower() == "enter": k = Keys.ENTER
286
  elif key_name.lower() == "space": k = Keys.SPACE
287
  elif key_name.lower() == "backspace": k = Keys.BACK_SPACE
288
  elif key_name.lower() == "esc": k = Keys.ESCAPE
289
- else: k = key_name # Fallback to literal
290
-
291
  actions = ActionChains(self.driver)
292
  actions.send_keys(k)
293
  actions.perform()
@@ -295,9 +303,8 @@ class SeleniumSandbox:
295
  print(f"Error pressing key: {e}")
296
 
297
  def scroll(self, amount, direction="down"):
298
- # Selenium doesn't have a great scroll wheel primitive, use JS
299
  try:
300
- scroll_y = amount * 100 # Arbitrary multiplier to match "notches"
301
  if direction == "up":
302
  scroll_y = -scroll_y
303
  self.driver.execute_script(f"window.scrollBy(0, {scroll_y});")
@@ -604,7 +611,6 @@ def save_final_status(folder, status: str, summary, error_message=None) -> None:
604
  # UI & APP
605
  # -----------------------------------------------------------------------------
606
 
607
- # CSS
608
  custom_css = """
609
  .modal-container { margin: var(--size-16) auto!important; }
610
  .browser-container { position: relative; width: 100%; height: 600px; border: 1px solid #444; background: #222; display: flex; align-items: center; justify-content: center; overflow: hidden; }
@@ -612,13 +618,6 @@ custom_css = """
612
  #chatbot { height: 800px!important; }
613
  """
614
 
615
- # HTML Template for the output area (Replaced IFrame with simple image holder logic handled by Gradio Image)
616
- browser_html = """
617
- <div class="browser-container">
618
- <p style="color: #888;">Browser Screenshot will appear here after steps.</p>
619
- </div>
620
- """
621
-
622
  class EnrichedGradioUI(GradioUI):
623
  def interact_with_agent(
624
  self,
@@ -632,7 +631,6 @@ class EnrichedGradioUI(GradioUI):
632
  interaction_id = generate_interaction_id(session_uuid)
633
  data_dir = os.path.join(TMP_DIR, interaction_id)
634
 
635
- # Initialize Sandbox per run (clean slate)
636
  sandbox = SeleniumSandbox(width=WIDTH, height=HEIGHT)
637
  agent = create_agent(data_dir=data_dir, sandbox=sandbox)
638
  session_state["agent"] = agent
@@ -641,19 +639,15 @@ class EnrichedGradioUI(GradioUI):
641
  stored_messages.append(gr.ChatMessage(role="user", content=task_input))
642
  yield stored_messages, None
643
 
644
- # Initial screenshot
645
  screenshot = sandbox.get_screenshot()
646
 
647
- # Run Agent
648
  for msg in stream_to_gradio(
649
  agent,
650
  task=task_input,
651
  task_images=[screenshot],
652
  reset_agent_memory=False,
653
  ):
654
- # Update Chat
655
  if hasattr(agent, "last_marked_screenshot") and msg.content == "-----":
656
- # Add image to chat
657
  stored_messages.append(
658
  gr.ChatMessage(
659
  role="assistant",
@@ -663,11 +657,10 @@ class EnrichedGradioUI(GradioUI):
663
  },
664
  )
665
  )
666
- # Yield updated chat AND the latest screenshot for the side panel
667
  yield stored_messages, agent.last_marked_screenshot.to_string()
668
  else:
669
  stored_messages.append(msg)
670
- yield stored_messages, None # Keep previous side image
671
 
672
  if consent_storage:
673
  summary = get_agent_summary_erase_images(agent)
@@ -683,10 +676,8 @@ class EnrichedGradioUI(GradioUI):
683
  )
684
  yield stored_messages, None
685
  finally:
686
- # Important: Cleanup Sandbox
687
  sandbox.cleanup()
688
 
689
- # Gradio Block Construction
690
  theme = gr.themes.Default(
691
  font=["Oxanium", "sans-serif"], primary_hue="amber", secondary_hue="blue"
692
  )
@@ -697,7 +688,6 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
697
  stored_messages = gr.State([])
698
 
699
  with gr.Row():
700
- # Left Sidebar: Inputs
701
  with gr.Column(scale=1):
702
  gr.Markdown("### Fara CUA - Chrome Agent 🌐")
703
 
@@ -708,22 +698,18 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
708
  )
709
  run_btn = gr.Button("Start Task", variant="primary")
710
  stop_btn = gr.Button("Stop", variant="secondary")
711
-
712
  consent_storage = gr.Checkbox(label="Save logs locally?", value=True)
713
 
714
  gr.Examples(
715
  examples=[
716
  "Go to google.com and search for 'Hugging Face', then click the first link.",
717
  "Go to wikipedia.org, type 'Python' in search, and click the search button.",
718
- "Open huggingface.co and find the 'Spaces' link."
719
  ],
720
  inputs=task_input
721
  )
722
 
723
- # Right Main: Chat & Live View
724
  with gr.Column(scale=3):
725
  with gr.Row():
726
- # Side-by-side: Chat and Latest Screenshot
727
  with gr.Column(scale=1):
728
  chatbot_display = gr.Chatbot(
729
  label="Agent Trace",
@@ -740,7 +726,6 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
740
  height=600
741
  )
742
 
743
- # UI Handler
744
  agent_ui = EnrichedGradioUI(CodeAgent(tools=[], model=Model(), name="init"))
745
 
746
  def interrupt_agent(session_state):
@@ -748,7 +733,6 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
748
  session_state["agent"].interrupt_switch = True
749
  return "Interrupted"
750
 
751
- # Run Logic
752
  run_event = run_btn.click(
753
  fn=agent_ui.interact_with_agent,
754
  inputs=[
@@ -764,4 +748,4 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
764
  stop_btn.click(fn=interrupt_agent, inputs=[session_state], outputs=[])
765
 
766
  if __name__ == "__main__":
767
- demo.launch()
 
192
  # SELENIUM CHROME SANDBOX
193
  # -----------------------------------------------------------------------------
194
 
195
+ def get_system_chrome_path():
196
+ # Common paths for chromium in Linux/HF Spaces
197
+ paths = [
198
+ "/usr/bin/chromium",
199
+ "/usr/bin/chromium-browser",
200
+ "/usr/bin/google-chrome",
201
+ ]
202
+ for p in paths:
203
+ if os.path.exists(p):
204
+ return p
205
+ return None
206
+
207
  class SeleniumSandbox:
208
  def __init__(self, width=1024, height=768):
209
  self.width = width
 
212
 
213
  # Setup Chrome Options
214
  chrome_opts = ChromeOptions()
215
+
216
+ # Use system binary if available (fixes status 127 in HF Spaces)
217
+ binary_path = get_system_chrome_path()
218
+ if binary_path:
219
+ print(f"Using system Chrome binary at: {binary_path}")
220
+ chrome_opts.binary_location = binary_path
221
+
222
+ chrome_opts.add_argument("--headless=new")
223
  chrome_opts.add_argument(f"--user-data-dir={self.tmp_dir}")
224
  chrome_opts.add_argument(f"--window-size={width},{height}")
225
+ chrome_opts.add_argument("--no-sandbox") # Crucial for containers
226
+ chrome_opts.add_argument("--disable-dev-shm-usage") # Crucial for containers
 
227
  chrome_opts.add_argument("--disable-gpu")
228
+ chrome_opts.add_argument("--disable-extensions")
 
229
 
230
  # Initialize Driver
231
  try:
232
+ # Check for system driver first
233
+ system_driver_path = "/usr/bin/chromedriver"
234
+ if os.path.exists(system_driver_path):
235
+ print(f"Using system ChromeDriver at: {system_driver_path}")
236
+ service = ChromeService(executable_path=system_driver_path)
237
+ else:
238
+ print("Using webdriver_manager to install ChromeDriver...")
239
+ service = ChromeService(ChromeDriverManager().install())
240
 
241
+ self.driver = webdriver.Chrome(service=service, options=chrome_opts)
242
+ self.driver.set_window_size(width, height)
243
  self.driver.get("about:blank")
244
+ print(f"Selenium Chrome Driver started successfully.")
245
+
246
  except Exception as e:
247
  print(f"Failed to initialize Selenium: {e}")
248
  self.cleanup()
 
254
  return Image.open(BytesIO(png_data))
255
 
256
  def move_mouse_and_click(self, x, y, click_type="left"):
 
 
 
 
257
  try:
258
  body = self.driver.find_element(By.TAG_NAME, "body")
259
  actions = ActionChains(self.driver)
 
 
260
  actions.move_to_element_with_offset(body, 0, 0)
261
  actions.move_by_offset(x, y)
 
262
  if click_type == "left":
263
  actions.click()
264
  elif click_type == "right":
265
  actions.context_click()
266
  elif click_type == "double":
267
  actions.double_click()
 
268
  actions.perform()
269
  except Exception as e:
270
  print(f"Error in move_mouse_and_click: {e}")
 
273
  try:
274
  body = self.driver.find_element(By.TAG_NAME, "body")
275
  actions = ActionChains(self.driver)
 
276
  actions.move_to_element_with_offset(body, 0, 0)
277
  actions.move_by_offset(x1, y1)
278
  actions.click_and_hold()
 
 
279
  actions.move_by_offset(x2 - x1, y2 - y1)
280
  actions.release()
281
  actions.perform()
 
291
  try:
292
  k = getattr(Keys, key_name.upper(), None)
293
  if not k:
 
294
  if key_name.lower() == "enter": k = Keys.ENTER
295
  elif key_name.lower() == "space": k = Keys.SPACE
296
  elif key_name.lower() == "backspace": k = Keys.BACK_SPACE
297
  elif key_name.lower() == "esc": k = Keys.ESCAPE
298
+ else: k = key_name
 
299
  actions = ActionChains(self.driver)
300
  actions.send_keys(k)
301
  actions.perform()
 
303
  print(f"Error pressing key: {e}")
304
 
305
  def scroll(self, amount, direction="down"):
 
306
  try:
307
+ scroll_y = amount * 100
308
  if direction == "up":
309
  scroll_y = -scroll_y
310
  self.driver.execute_script(f"window.scrollBy(0, {scroll_y});")
 
611
  # UI & APP
612
  # -----------------------------------------------------------------------------
613
 
 
614
  custom_css = """
615
  .modal-container { margin: var(--size-16) auto!important; }
616
  .browser-container { position: relative; width: 100%; height: 600px; border: 1px solid #444; background: #222; display: flex; align-items: center; justify-content: center; overflow: hidden; }
 
618
  #chatbot { height: 800px!important; }
619
  """
620
 
 
 
 
 
 
 
 
621
  class EnrichedGradioUI(GradioUI):
622
  def interact_with_agent(
623
  self,
 
631
  interaction_id = generate_interaction_id(session_uuid)
632
  data_dir = os.path.join(TMP_DIR, interaction_id)
633
 
 
634
  sandbox = SeleniumSandbox(width=WIDTH, height=HEIGHT)
635
  agent = create_agent(data_dir=data_dir, sandbox=sandbox)
636
  session_state["agent"] = agent
 
639
  stored_messages.append(gr.ChatMessage(role="user", content=task_input))
640
  yield stored_messages, None
641
 
 
642
  screenshot = sandbox.get_screenshot()
643
 
 
644
  for msg in stream_to_gradio(
645
  agent,
646
  task=task_input,
647
  task_images=[screenshot],
648
  reset_agent_memory=False,
649
  ):
 
650
  if hasattr(agent, "last_marked_screenshot") and msg.content == "-----":
 
651
  stored_messages.append(
652
  gr.ChatMessage(
653
  role="assistant",
 
657
  },
658
  )
659
  )
 
660
  yield stored_messages, agent.last_marked_screenshot.to_string()
661
  else:
662
  stored_messages.append(msg)
663
+ yield stored_messages, None
664
 
665
  if consent_storage:
666
  summary = get_agent_summary_erase_images(agent)
 
676
  )
677
  yield stored_messages, None
678
  finally:
 
679
  sandbox.cleanup()
680
 
 
681
  theme = gr.themes.Default(
682
  font=["Oxanium", "sans-serif"], primary_hue="amber", secondary_hue="blue"
683
  )
 
688
  stored_messages = gr.State([])
689
 
690
  with gr.Row():
 
691
  with gr.Column(scale=1):
692
  gr.Markdown("### Fara CUA - Chrome Agent 🌐")
693
 
 
698
  )
699
  run_btn = gr.Button("Start Task", variant="primary")
700
  stop_btn = gr.Button("Stop", variant="secondary")
 
701
  consent_storage = gr.Checkbox(label="Save logs locally?", value=True)
702
 
703
  gr.Examples(
704
  examples=[
705
  "Go to google.com and search for 'Hugging Face', then click the first link.",
706
  "Go to wikipedia.org, type 'Python' in search, and click the search button.",
 
707
  ],
708
  inputs=task_input
709
  )
710
 
 
711
  with gr.Column(scale=3):
712
  with gr.Row():
 
713
  with gr.Column(scale=1):
714
  chatbot_display = gr.Chatbot(
715
  label="Agent Trace",
 
726
  height=600
727
  )
728
 
 
729
  agent_ui = EnrichedGradioUI(CodeAgent(tools=[], model=Model(), name="init"))
730
 
731
  def interrupt_agent(session_state):
 
733
  session_state["agent"].interrupt_switch = True
734
  return "Interrupted"
735
 
 
736
  run_event = run_btn.click(
737
  fn=agent_ui.interact_with_agent,
738
  inputs=[
 
748
  stop_btn.click(fn=interrupt_agent, inputs=[session_state], outputs=[])
749
 
750
  if __name__ == "__main__":
751
+ demo.launch(share=True)