Josedcape commited on
Commit
8f0298e
·
verified ·
1 Parent(s): e4b63ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -952
app.py CHANGED
@@ -1,952 +1,100 @@
1
- import pdb
2
- import logging
3
-
4
- from dotenv import load_dotenv
5
-
6
- load_dotenv()
7
- import os
8
- import glob
9
- import asyncio
10
- import argparse
11
- import os
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
- import gradio as gr
16
-
17
- from browser_use.agent.service import Agent
18
- from playwright.async_api import async_playwright
19
- from browser_use.browser.browser import Browser, BrowserConfig
20
- from browser_use.browser.context import (
21
- BrowserContextConfig,
22
- BrowserContextWindowSize,
23
- )
24
- from playwright.async_api import async_playwright
25
- from src.utils.agent_state import AgentState
26
-
27
- from src.utils import utils
28
- from src.agent.custom_agent import CustomAgent
29
- from src.browser.custom_browser import CustomBrowser
30
- from src.agent.custom_prompts import CustomSystemPrompt
31
- from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext
32
- from src.controller.custom_controller import CustomController
33
- from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base
34
- from src.utils.default_config_settings import default_config, load_config_from_file, save_config_to_file, save_current_config, update_ui_from_config
35
- from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot
36
-
37
-
38
- # Global variables for persistence
39
- _global_browser = None
40
- _global_browser_context = None
41
-
42
- # Create the global agent state instance
43
- _global_agent_state = AgentState()
44
-
45
- async def stop_agent():
46
- """Request the agent to stop and update UI with enhanced feedback"""
47
- global _global_agent_state, _global_browser_context, _global_browser
48
-
49
- try:
50
- # Request stop
51
- _global_agent_state.request_stop()
52
-
53
- # Update UI immediately
54
- message = "Stop requested - the agent will halt at the next safe point"
55
- logger.info(f"🛑 {message}")
56
-
57
- # Return UI updates
58
- return (
59
- message, # errors_output
60
- gr.update(value="Stopping...", interactive=False), # stop_button
61
- gr.update(interactive=False), # run_button
62
- )
63
- except Exception as e:
64
- error_msg = f"Error during stop: {str(e)}"
65
- logger.error(error_msg)
66
- return (
67
- error_msg,
68
- gr.update(value="Stop", interactive=True),
69
- gr.update(interactive=True)
70
- )
71
-
72
- async def run_browser_agent(
73
- agent_type,
74
- llm_provider,
75
- llm_model_name,
76
- llm_temperature,
77
- llm_base_url,
78
- llm_api_key,
79
- use_own_browser,
80
- keep_browser_open,
81
- headless,
82
- disable_security,
83
- window_w,
84
- window_h,
85
- save_recording_path,
86
- save_agent_history_path,
87
- save_trace_path,
88
- enable_recording,
89
- task,
90
- add_infos,
91
- max_steps,
92
- use_vision,
93
- max_actions_per_step,
94
- tool_call_in_content
95
- ):
96
- global _global_agent_state
97
- _global_agent_state.clear_stop() # Clear any previous stop requests
98
-
99
- try:
100
- # Disable recording if the checkbox is unchecked
101
- if not enable_recording:
102
- save_recording_path = None
103
-
104
- # Ensure the recording directory exists if recording is enabled
105
- if save_recording_path:
106
- os.makedirs(save_recording_path, exist_ok=True)
107
-
108
- # Get the list of existing videos before the agent runs
109
- existing_videos = set()
110
- if save_recording_path:
111
- existing_videos = set(
112
- glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
113
- + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
114
- )
115
-
116
- # Run the agent
117
- llm = utils.get_llm_model(
118
- provider=llm_provider,
119
- model_name=llm_model_name,
120
- temperature=llm_temperature,
121
- base_url=llm_base_url,
122
- api_key=llm_api_key,
123
- )
124
- if agent_type == "org":
125
- final_result, errors, model_actions, model_thoughts, trace_file, history_file = await run_org_agent(
126
- llm=llm,
127
- use_own_browser=use_own_browser,
128
- keep_browser_open=keep_browser_open,
129
- headless=headless,
130
- disable_security=disable_security,
131
- window_w=window_w,
132
- window_h=window_h,
133
- save_recording_path=save_recording_path,
134
- save_agent_history_path=save_agent_history_path,
135
- save_trace_path=save_trace_path,
136
- task=task,
137
- max_steps=max_steps,
138
- use_vision=use_vision,
139
- max_actions_per_step=max_actions_per_step,
140
- tool_call_in_content=tool_call_in_content
141
- )
142
- elif agent_type == "custom":
143
- final_result, errors, model_actions, model_thoughts, trace_file, history_file = await run_custom_agent(
144
- llm=llm,
145
- use_own_browser=use_own_browser,
146
- keep_browser_open=keep_browser_open,
147
- headless=headless,
148
- disable_security=disable_security,
149
- window_w=window_w,
150
- window_h=window_h,
151
- save_recording_path=save_recording_path,
152
- save_agent_history_path=save_agent_history_path,
153
- save_trace_path=save_trace_path,
154
- task=task,
155
- add_infos=add_infos,
156
- max_steps=max_steps,
157
- use_vision=use_vision,
158
- max_actions_per_step=max_actions_per_step,
159
- tool_call_in_content=tool_call_in_content
160
- )
161
- else:
162
- raise ValueError(f"Invalid agent type: {agent_type}")
163
-
164
- # Get the list of videos after the agent runs (if recording is enabled)
165
- latest_video = None
166
- if save_recording_path:
167
- new_videos = set(
168
- glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
169
- + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
170
- )
171
- if new_videos - existing_videos:
172
- latest_video = list(new_videos - existing_videos)[0] # Get the first new video
173
-
174
- return (
175
- final_result,
176
- errors,
177
- model_actions,
178
- model_thoughts,
179
- latest_video,
180
- trace_file,
181
- history_file,
182
- gr.update(value="Stop", interactive=True), # Re-enable stop button
183
- gr.update(interactive=True) # Re-enable run button
184
- )
185
-
186
- except Exception as e:
187
- import traceback
188
- traceback.print_exc()
189
- errors = str(e) + "\n" + traceback.format_exc()
190
- return (
191
- '', # final_result
192
- errors, # errors
193
- '', # model_actions
194
- '', # model_thoughts
195
- None, # latest_video
196
- None, # history_file
197
- None, # trace_file
198
- gr.update(value="Stop", interactive=True), # Re-enable stop button
199
- gr.update(interactive=True) # Re-enable run button
200
- )
201
-
202
-
203
- async def run_org_agent(
204
- llm,
205
- use_own_browser,
206
- keep_browser_open,
207
- headless,
208
- disable_security,
209
- window_w,
210
- window_h,
211
- save_recording_path,
212
- save_agent_history_path,
213
- save_trace_path,
214
- task,
215
- max_steps,
216
- use_vision,
217
- max_actions_per_step,
218
- tool_call_in_content
219
- ):
220
- try:
221
- global _global_browser, _global_browser_context, _global_agent_state
222
-
223
- # Clear any previous stop request
224
- _global_agent_state.clear_stop()
225
-
226
- if use_own_browser:
227
- chrome_path = os.getenv("CHROME_PATH", None)
228
- if chrome_path == "":
229
- chrome_path = None
230
- else:
231
- chrome_path = None
232
-
233
- if _global_browser is None:
234
- _global_browser = Browser(
235
- config=BrowserConfig(
236
- headless=headless,
237
- disable_security=disable_security,
238
- chrome_instance_path=chrome_path,
239
- extra_chromium_args=[f"--window-size={window_w},{window_h}"],
240
- )
241
- )
242
-
243
- if _global_browser_context is None:
244
- _global_browser_context = await _global_browser.new_context(
245
- config=BrowserContextConfig(
246
- trace_path=save_trace_path if save_trace_path else None,
247
- save_recording_path=save_recording_path if save_recording_path else None,
248
- no_viewport=False,
249
- browser_window_size=BrowserContextWindowSize(
250
- width=window_w, height=window_h
251
- ),
252
- )
253
- )
254
-
255
- agent = Agent(
256
- task=task,
257
- llm=llm,
258
- use_vision=use_vision,
259
- browser=_global_browser,
260
- browser_context=_global_browser_context,
261
- max_actions_per_step=max_actions_per_step,
262
- tool_call_in_content=tool_call_in_content
263
- )
264
- history = await agent.run(max_steps=max_steps)
265
-
266
- history_file = os.path.join(save_agent_history_path, f"{agent.agent_id}.json")
267
- agent.save_history(history_file)
268
-
269
- final_result = history.final_result()
270
- errors = history.errors()
271
- model_actions = history.model_actions()
272
- model_thoughts = history.model_thoughts()
273
-
274
- trace_file = get_latest_files(save_trace_path)
275
-
276
- return final_result, errors, model_actions, model_thoughts, trace_file.get('.zip'), history_file
277
- except Exception as e:
278
- import traceback
279
- traceback.print_exc()
280
- errors = str(e) + "\n" + traceback.format_exc()
281
- return '', errors, '', '', None, None
282
- finally:
283
- # Handle cleanup based on persistence configuration
284
- if not keep_browser_open:
285
- if _global_browser_context:
286
- await _global_browser_context.close()
287
- _global_browser_context = None
288
-
289
- if _global_browser:
290
- await _global_browser.close()
291
- _global_browser = None
292
-
293
- async def run_custom_agent(
294
- llm,
295
- use_own_browser,
296
- keep_browser_open,
297
- headless,
298
- disable_security,
299
- window_w,
300
- window_h,
301
- save_recording_path,
302
- save_agent_history_path,
303
- save_trace_path,
304
- task,
305
- add_infos,
306
- max_steps,
307
- use_vision,
308
- max_actions_per_step,
309
- tool_call_in_content
310
- ):
311
- try:
312
- global _global_browser, _global_browser_context, _global_agent_state
313
-
314
- # Clear any previous stop request
315
- _global_agent_state.clear_stop()
316
-
317
- if use_own_browser:
318
- chrome_path = os.getenv("CHROME_PATH", None)
319
- if chrome_path == "":
320
- chrome_path = None
321
- else:
322
- chrome_path = None
323
-
324
- controller = CustomController()
325
-
326
- # Initialize global browser if needed
327
- if _global_browser is None:
328
- _global_browser = CustomBrowser(
329
- config=BrowserConfig(
330
- headless=headless,
331
- disable_security=disable_security,
332
- chrome_instance_path=chrome_path,
333
- extra_chromium_args=[f"--window-size={window_w},{window_h}"],
334
- )
335
- )
336
-
337
- if _global_browser_context is None:
338
- _global_browser_context = await _global_browser.new_context(
339
- config=BrowserContextConfig(
340
- trace_path=save_trace_path if save_trace_path else None,
341
- save_recording_path=save_recording_path if save_recording_path else None,
342
- no_viewport=False,
343
- browser_window_size=BrowserContextWindowSize(
344
- width=window_w, height=window_h
345
- ),
346
- )
347
- )
348
-
349
- # Create and run agent
350
- agent = CustomAgent(
351
- task=task,
352
- add_infos=add_infos,
353
- use_vision=use_vision,
354
- llm=llm,
355
- browser=_global_browser,
356
- browser_context=_global_browser_context,
357
- controller=controller,
358
- system_prompt_class=CustomSystemPrompt,
359
- max_actions_per_step=max_actions_per_step,
360
- tool_call_in_content=tool_call_in_content,
361
- agent_state=_global_agent_state
362
- )
363
- history = await agent.run(max_steps=max_steps)
364
-
365
- history_file = os.path.join(save_agent_history_path, f"{agent.agent_id}.json")
366
- agent.save_history(history_file)
367
-
368
- final_result = history.final_result()
369
- errors = history.errors()
370
- model_actions = history.model_actions()
371
- model_thoughts = history.model_thoughts()
372
-
373
- trace_file = get_latest_files(save_trace_path)
374
-
375
- return final_result, errors, model_actions, model_thoughts, trace_file.get('.zip'), history_file
376
- except Exception as e:
377
- import traceback
378
- traceback.print_exc()
379
- errors = str(e) + "\n" + traceback.format_exc()
380
- return '', errors, '', '', None, None
381
- finally:
382
- # Handle cleanup based on persistence configuration
383
- if not keep_browser_open:
384
- if _global_browser_context:
385
- await _global_browser_context.close()
386
- _global_browser_context = None
387
-
388
- if _global_browser:
389
- await _global_browser.close()
390
- _global_browser = None
391
-
392
- async def run_with_stream(
393
- agent_type,
394
- llm_provider,
395
- llm_model_name,
396
- llm_temperature,
397
- llm_base_url,
398
- llm_api_key,
399
- use_own_browser,
400
- keep_browser_open,
401
- headless,
402
- disable_security,
403
- window_w,
404
- window_h,
405
- save_recording_path,
406
- save_agent_history_path,
407
- save_trace_path,
408
- enable_recording,
409
- task,
410
- add_infos,
411
- max_steps,
412
- use_vision,
413
- max_actions_per_step,
414
- tool_call_in_content
415
- ):
416
- global _global_agent_state
417
- stream_vw = 80
418
- stream_vh = int(80 * window_h // window_w)
419
- if not headless:
420
- result = await run_browser_agent(
421
- agent_type=agent_type,
422
- llm_provider=llm_provider,
423
- llm_model_name=llm_model_name,
424
- llm_temperature=llm_temperature,
425
- llm_base_url=llm_base_url,
426
- llm_api_key=llm_api_key,
427
- use_own_browser=use_own_browser,
428
- keep_browser_open=keep_browser_open,
429
- headless=headless,
430
- disable_security=disable_security,
431
- window_w=window_w,
432
- window_h=window_h,
433
- save_recording_path=save_recording_path,
434
- save_agent_history_path=save_agent_history_path,
435
- save_trace_path=save_trace_path,
436
- enable_recording=enable_recording,
437
- task=task,
438
- add_infos=add_infos,
439
- max_steps=max_steps,
440
- use_vision=use_vision,
441
- max_actions_per_step=max_actions_per_step,
442
- tool_call_in_content=tool_call_in_content
443
- )
444
- # Add HTML content at the start of the result array
445
- html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
446
- yield [html_content] + list(result)
447
- else:
448
- try:
449
- _global_agent_state.clear_stop()
450
- # Run the browser agent in the background
451
- agent_task = asyncio.create_task(
452
- run_browser_agent(
453
- agent_type=agent_type,
454
- llm_provider=llm_provider,
455
- llm_model_name=llm_model_name,
456
- llm_temperature=llm_temperature,
457
- llm_base_url=llm_base_url,
458
- llm_api_key=llm_api_key,
459
- use_own_browser=use_own_browser,
460
- keep_browser_open=keep_browser_open,
461
- headless=headless,
462
- disable_security=disable_security,
463
- window_w=window_w,
464
- window_h=window_h,
465
- save_recording_path=save_recording_path,
466
- save_agent_history_path=save_agent_history_path,
467
- save_trace_path=save_trace_path,
468
- enable_recording=enable_recording,
469
- task=task,
470
- add_infos=add_infos,
471
- max_steps=max_steps,
472
- use_vision=use_vision,
473
- max_actions_per_step=max_actions_per_step,
474
- tool_call_in_content=tool_call_in_content
475
- )
476
- )
477
-
478
- # Initialize values for streaming
479
- html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
480
- final_result = errors = model_actions = model_thoughts = ""
481
- latest_videos = trace = history_file = None
482
-
483
-
484
- # Periodically update the stream while the agent task is running
485
- while not agent_task.done():
486
- try:
487
- encoded_screenshot = await capture_screenshot(_global_browser_context)
488
- if encoded_screenshot is not None:
489
- html_content = f'<img src="data:image/jpeg;base64,{encoded_screenshot}" style="width:{stream_vw}vw; height:{stream_vh}vh ; border:1px solid #ccc;">'
490
- else:
491
- html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
492
- except Exception as e:
493
- html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
494
-
495
- if _global_agent_state and _global_agent_state.is_stop_requested():
496
- yield [
497
- html_content,
498
- final_result,
499
- errors,
500
- model_actions,
501
- model_thoughts,
502
- latest_videos,
503
- trace,
504
- history_file,
505
- gr.update(value="Stopping...", interactive=False), # stop_button
506
- gr.update(interactive=False), # run_button
507
- ]
508
- break
509
- else:
510
- yield [
511
- html_content,
512
- final_result,
513
- errors,
514
- model_actions,
515
- model_thoughts,
516
- latest_videos,
517
- trace,
518
- history_file,
519
- gr.update(value="Stop", interactive=True), # Re-enable stop button
520
- gr.update(interactive=True) # Re-enable run button
521
- ]
522
- await asyncio.sleep(0.05)
523
-
524
- # Once the agent task completes, get the results
525
- try:
526
- result = await agent_task
527
- final_result, errors, model_actions, model_thoughts, latest_videos, trace, history_file, stop_button, run_button = result
528
- except Exception as e:
529
- errors = f"Agent error: {str(e)}"
530
-
531
- yield [
532
- html_content,
533
- final_result,
534
- errors,
535
- model_actions,
536
- model_thoughts,
537
- latest_videos,
538
- trace,
539
- history_file,
540
- stop_button,
541
- run_button
542
- ]
543
-
544
- except Exception as e:
545
- import traceback
546
- yield [
547
- f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>",
548
- "",
549
- f"Error: {str(e)}\n{traceback.format_exc()}",
550
- "",
551
- "",
552
- None,
553
- None,
554
- None,
555
- gr.update(value="Stop", interactive=True), # Re-enable stop button
556
- gr.update(interactive=True) # Re-enable run button
557
- ]
558
-
559
- # Define the theme map globally
560
- theme_map = {
561
- "Default": Default(),
562
- "Soft": Soft(),
563
- "Monochrome": Monochrome(),
564
- "Glass": Glass(),
565
- "Origin": Origin(),
566
- "Citrus": Citrus(),
567
- "Ocean": Ocean(),
568
- "Base": Base()
569
- }
570
-
571
- async def close_global_browser():
572
- global _global_browser, _global_browser_context
573
-
574
- if _global_browser_context:
575
- await _global_browser_context.close()
576
- _global_browser_context = None
577
-
578
- if _global_browser:
579
- await _global_browser.close()
580
- _global_browser = None
581
-
582
- def create_ui(config, theme_name="Ocean"):
583
- css = """
584
- .gradio-container {
585
- max-width: 1200px !important;
586
- margin: auto !important;
587
- padding-top: 20px !important;
588
- }
589
- .header-text {
590
- text-align: center;
591
- margin-bottom: 30px;
592
- }
593
- .theme-section {
594
- margin-bottom: 20px;
595
- padding: 15px;
596
- border-radius: 10px;
597
- }
598
- """
599
-
600
- js = """
601
- function refresh() {
602
- const url = new URL(window.location);
603
- if (url.searchParams.get('__theme') !== 'dark') {
604
- url.searchParams.set('__theme', 'dark');
605
- window.location.href = url.href;
606
- }
607
- }
608
- """
609
-
610
- with gr.Blocks(
611
- title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js
612
- ) as demo:
613
- with gr.Row():
614
- gr.Markdown(
615
- """
616
- # 🌐 Browser Use WebUI
617
- ### Control your browser with AI assistance
618
- """,
619
- elem_classes=["header-text"],
620
- )
621
-
622
- with gr.Tabs() as tabs:
623
- with gr.TabItem("⚙️ Agent Settings", id=1):
624
- with gr.Group():
625
- agent_type = gr.Radio(
626
- ["org", "custom"],
627
- label="Agent Type",
628
- value=config['agent_type'],
629
- info="Select the type of agent to use",
630
- )
631
- max_steps = gr.Slider(
632
- minimum=1,
633
- maximum=200,
634
- value=config['max_steps'],
635
- step=1,
636
- label="Max Run Steps",
637
- info="Maximum number of steps the agent will take",
638
- )
639
- max_actions_per_step = gr.Slider(
640
- minimum=1,
641
- maximum=20,
642
- value=config['max_actions_per_step'],
643
- step=1,
644
- label="Max Actions per Step",
645
- info="Maximum number of actions the agent will take per step",
646
- )
647
- use_vision = gr.Checkbox(
648
- label="Use Vision",
649
- value=config['use_vision'],
650
- info="Enable visual processing capabilities",
651
- )
652
- tool_call_in_content = gr.Checkbox(
653
- label="Use Tool Calls in Content",
654
- value=config['tool_call_in_content'],
655
- info="Enable Tool Calls in content",
656
- )
657
-
658
- with gr.TabItem("🔧 LLM Configuration", id=2):
659
- with gr.Group():
660
- llm_provider = gr.Dropdown(
661
- choices=[provider for provider,model in utils.model_names.items()],
662
- label="LLM Provider",
663
- value=config['llm_provider'],
664
- info="Select your preferred language model provider"
665
- )
666
- llm_model_name = gr.Dropdown(
667
- label="Model Name",
668
- choices=utils.model_names['openai'],
669
- value=config['llm_model_name'],
670
- interactive=True,
671
- allow_custom_value=True, # Allow users to input custom model names
672
- info="Select a model from the dropdown or type a custom model name"
673
- )
674
- llm_temperature = gr.Slider(
675
- minimum=0.0,
676
- maximum=2.0,
677
- value=config['llm_temperature'],
678
- step=0.1,
679
- label="Temperature",
680
- info="Controls randomness in model outputs"
681
- )
682
- with gr.Row():
683
- llm_base_url = gr.Textbox(
684
- label="Base URL",
685
- value=config['llm_base_url'],
686
- info="API endpoint URL (if required)"
687
- )
688
- llm_api_key = gr.Textbox(
689
- label="API Key",
690
- type="password",
691
- value=config['llm_api_key'],
692
- info="Your API key (leave blank to use .env)"
693
- )
694
-
695
- with gr.TabItem("🌐 Browser Settings", id=3):
696
- with gr.Group():
697
- with gr.Row():
698
- use_own_browser = gr.Checkbox(
699
- label="Use Own Browser",
700
- value=config['use_own_browser'],
701
- info="Use your existing browser instance",
702
- )
703
- keep_browser_open = gr.Checkbox(
704
- label="Keep Browser Open",
705
- value=config['keep_browser_open'],
706
- info="Keep Browser Open between Tasks",
707
- )
708
- headless = gr.Checkbox(
709
- label="Headless Mode",
710
- value=config['headless'],
711
- info="Run browser without GUI",
712
- )
713
- disable_security = gr.Checkbox(
714
- label="Disable Security",
715
- value=config['disable_security'],
716
- info="Disable browser security features",
717
- )
718
- enable_recording = gr.Checkbox(
719
- label="Enable Recording",
720
- value=config['enable_recording'],
721
- info="Enable saving browser recordings",
722
- )
723
-
724
- with gr.Row():
725
- window_w = gr.Number(
726
- label="Window Width",
727
- value=config['window_w'],
728
- info="Browser window width",
729
- )
730
- window_h = gr.Number(
731
- label="Window Height",
732
- value=config['window_h'],
733
- info="Browser window height",
734
- )
735
-
736
- save_recording_path = gr.Textbox(
737
- label="Recording Path",
738
- placeholder="e.g. ./tmp/record_videos",
739
- value=config['save_recording_path'],
740
- info="Path to save browser recordings",
741
- interactive=True, # Allow editing only if recording is enabled
742
- )
743
-
744
- save_trace_path = gr.Textbox(
745
- label="Trace Path",
746
- placeholder="e.g. ./tmp/traces",
747
- value=config['save_trace_path'],
748
- info="Path to save Agent traces",
749
- interactive=True,
750
- )
751
-
752
- save_agent_history_path = gr.Textbox(
753
- label="Agent History Save Path",
754
- placeholder="e.g., ./tmp/agent_history",
755
- value=config['save_agent_history_path'],
756
- info="Specify the directory where agent history should be saved.",
757
- interactive=True,
758
- )
759
-
760
- with gr.TabItem("🤖 Run Agent", id=4):
761
- task = gr.Textbox(
762
- label="Task Description",
763
- lines=4,
764
- placeholder="Enter your task here...",
765
- value=config['task'],
766
- info="Describe what you want the agent to do",
767
- )
768
- add_infos = gr.Textbox(
769
- label="Additional Information",
770
- lines=3,
771
- placeholder="Add any helpful context or instructions...",
772
- info="Optional hints to help the LLM complete the task",
773
- )
774
-
775
- with gr.Row():
776
- run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
777
- stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
778
-
779
- with gr.Row():
780
- browser_view = gr.HTML(
781
- value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
782
- label="Live Browser View",
783
- )
784
-
785
- with gr.TabItem("📁 Configuration", id=5):
786
- with gr.Group():
787
- config_file_input = gr.File(
788
- label="Load Config File",
789
- file_types=[".pkl"],
790
- interactive=True
791
- )
792
-
793
- load_config_button = gr.Button("Load Existing Config From File", variant="primary")
794
- save_config_button = gr.Button("Save Current Config", variant="primary")
795
-
796
- config_status = gr.Textbox(
797
- label="Status",
798
- lines=2,
799
- interactive=False
800
- )
801
-
802
- load_config_button.click(
803
- fn=update_ui_from_config,
804
- inputs=[config_file_input],
805
- outputs=[
806
- agent_type, max_steps, max_actions_per_step, use_vision, tool_call_in_content,
807
- llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
808
- use_own_browser, keep_browser_open, headless, disable_security, enable_recording,
809
- window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path,
810
- task, config_status
811
- ]
812
- )
813
-
814
- save_config_button.click(
815
- fn=save_current_config,
816
- inputs=[
817
- agent_type, max_steps, max_actions_per_step, use_vision, tool_call_in_content,
818
- llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
819
- use_own_browser, keep_browser_open, headless, disable_security,
820
- enable_recording, window_w, window_h, save_recording_path, save_trace_path,
821
- save_agent_history_path, task,
822
- ],
823
- outputs=[config_status]
824
- )
825
-
826
- with gr.TabItem("📊 Results", id=6):
827
- with gr.Group():
828
-
829
- recording_display = gr.Video(label="Latest Recording")
830
-
831
- gr.Markdown("### Results")
832
- with gr.Row():
833
- with gr.Column():
834
- final_result_output = gr.Textbox(
835
- label="Final Result", lines=3, show_label=True
836
- )
837
- with gr.Column():
838
- errors_output = gr.Textbox(
839
- label="Errors", lines=3, show_label=True
840
- )
841
- with gr.Row():
842
- with gr.Column():
843
- model_actions_output = gr.Textbox(
844
- label="Model Actions", lines=3, show_label=True
845
- )
846
- with gr.Column():
847
- model_thoughts_output = gr.Textbox(
848
- label="Model Thoughts", lines=3, show_label=True
849
- )
850
-
851
- trace_file = gr.File(label="Trace File")
852
-
853
- agent_history_file = gr.File(label="Agent History")
854
-
855
- # Bind the stop button click event after errors_output is defined
856
- stop_button.click(
857
- fn=stop_agent,
858
- inputs=[],
859
- outputs=[errors_output, stop_button, run_button],
860
- )
861
-
862
- # Run button click handler
863
- run_button.click(
864
- fn=run_with_stream,
865
- inputs=[
866
- agent_type, llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
867
- use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h,
868
- save_recording_path, save_agent_history_path, save_trace_path, # Include the new path
869
- enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_call_in_content
870
- ],
871
- outputs=[
872
- browser_view, # Browser view
873
- final_result_output, # Final result
874
- errors_output, # Errors
875
- model_actions_output, # Model actions
876
- model_thoughts_output, # Model thoughts
877
- recording_display, # Latest recording
878
- trace_file, # Trace file
879
- agent_history_file, # Agent history file
880
- stop_button, # Stop button
881
- run_button # Run button
882
- ],
883
- )
884
-
885
- with gr.TabItem("🎥 Recordings", id=7):
886
- def list_recordings(save_recording_path):
887
- if not os.path.exists(save_recording_path):
888
- return []
889
-
890
- # Get all video files
891
- recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
892
-
893
- # Sort recordings by creation time (oldest first)
894
- recordings.sort(key=os.path.getctime)
895
-
896
- # Add numbering to the recordings
897
- numbered_recordings = []
898
- for idx, recording in enumerate(recordings, start=1):
899
- filename = os.path.basename(recording)
900
- numbered_recordings.append((recording, f"{idx}. {filename}"))
901
-
902
- return numbered_recordings
903
-
904
- recordings_gallery = gr.Gallery(
905
- label="Recordings",
906
- value=list_recordings(config['save_recording_path']),
907
- columns=3,
908
- height="auto",
909
- object_fit="contain"
910
- )
911
-
912
- refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary")
913
- refresh_button.click(
914
- fn=list_recordings,
915
- inputs=save_recording_path,
916
- outputs=recordings_gallery
917
- )
918
-
919
- # Attach the callback to the LLM provider dropdown
920
- llm_provider.change(
921
- lambda provider, api_key, base_url: update_model_dropdown(provider, api_key, base_url),
922
- inputs=[llm_provider, llm_api_key, llm_base_url],
923
- outputs=llm_model_name
924
- )
925
-
926
- # Add this after defining the components
927
- enable_recording.change(
928
- lambda enabled: gr.update(interactive=enabled),
929
- inputs=enable_recording,
930
- outputs=save_recording_path
931
- )
932
-
933
- use_own_browser.change(fn=close_global_browser)
934
- keep_browser_open.change(fn=close_global_browser)
935
-
936
- return demo
937
-
938
- def main():
939
- parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
940
- parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
941
- parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
942
- parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
943
- parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
944
- args = parser.parse_args()
945
-
946
- config_dict = default_config()
947
-
948
- demo = create_ui(config_dict, theme_name=args.theme)
949
- demo.launch(server_name=args.ip, server_port=args.port)
950
-
951
- if __name__ == '__main__':
952
- main()
 
1
+ import os
2
+ import glob
3
+ import asyncio
4
+ import argparse
5
+ import logging
6
+ from selenium import webdriver
7
+ from selenium.webdriver.chrome.service import Service
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.chrome.options import Options
10
+ from webdriver_manager.chrome import ChromeDriverManager
11
+ import gradio as gr
12
+
13
+ # Logger setup
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Global variables for browser persistence
17
+ _global_browser = None
18
+
19
+ def setup_browser(headless=True, window_size=(1280, 720)):
20
+ """Initialize a Selenium browser instance."""
21
+ global _global_browser
22
+
23
+ chrome_options = Options()
24
+ if headless:
25
+ chrome_options.add_argument("--headless")
26
+ chrome_options.add_argument(f"--window-size={window_size[0]},{window_size[1]}")
27
+
28
+ service = Service(ChromeDriverManager().install())
29
+ _global_browser = webdriver.Chrome(service=service, options=chrome_options)
30
+
31
+ return _global_browser
32
+
33
+
34
+ def close_browser():
35
+ """Close the Selenium browser instance."""
36
+ global _global_browser
37
+ if _global_browser:
38
+ _global_browser.quit()
39
+ _global_browser = None
40
+
41
+
42
+ async def run_agent(task_description, headless=True, window_size=(1280, 720)):
43
+ """Run a Selenium-based agent."""
44
+ global _global_browser
45
+
46
+ if not _global_browser:
47
+ setup_browser(headless=headless, window_size=window_size)
48
+
49
+ try:
50
+ logger.info(f"Executing task: {task_description}")
51
+ _global_browser.get("https://example.com") # Replace with your target URL
52
+
53
+ # Simulate some task
54
+ result = f"Task executed successfully on {task_description}"
55
+ return result, None # Return result and errors
56
+ except Exception as e:
57
+ logger.error(f"Error while running agent: {e}")
58
+ return None, str(e) # Return None result and error message
59
+
60
+
61
+ def create_ui():
62
+ """Create the Gradio UI."""
63
+ with gr.Blocks() as demo:
64
+ gr.Markdown("# Selenium-based Browser Automation")
65
+
66
+ task_input = gr.Textbox(
67
+ label="Task Description",
68
+ placeholder="Describe the task you want to execute...",
69
+ )
70
+
71
+ headless_mode = gr.Checkbox(
72
+ label="Run in Headless Mode", value=True, interactive=True
73
+ )
74
+
75
+ run_button = gr.Button("Run Task")
76
+ output_result = gr.Textbox(label="Result")
77
+ output_error = gr.Textbox(label="Error")
78
+
79
+ run_button.click(
80
+ run_agent,
81
+ inputs=[task_input, headless_mode],
82
+ outputs=[output_result, output_error],
83
+ )
84
+
85
+ return demo
86
+
87
+
88
+ def main():
89
+ parser = argparse.ArgumentParser(description="Gradio UI for Selenium Automation")
90
+ parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
91
+ parser.add_argument("--port", type=int, default=7860, help="Port to listen on")
92
+ args = parser.parse_args()
93
+
94
+ demo = create_ui()
95
+ demo.launch(server_name=args.ip, server_port=args.port)
96
+
97
+
98
+ if __name__ == "__main__":
99
+ main()
100
+