Josedcape commited on
Commit
ca69a38
·
verified ·
1 Parent(s): 89ea86d

Upload 13 files

Browse files
Files changed (13) hide show
  1. .dockerignore +2 -0
  2. .env +37 -0
  3. .gitignore +189 -0
  4. Dockerfile +87 -0
  5. LICENSE +21 -0
  6. README.md +184 -14
  7. SECURITY.md +19 -0
  8. agent_history.gif +0 -0
  9. docker-compose.yml +47 -0
  10. pyvenv.cfg +5 -0
  11. requirements.txt +6 -0
  12. supervisord.conf +83 -0
  13. webui.py +952 -0
.dockerignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ data
2
+ tmp
.env ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OPENAI_ENDPOINT=https://api.openai.com/v1
2
+ OPENAI_API_KEY=
3
+
4
+ ANTHROPIC_API_KEY=
5
+
6
+ GOOGLE_API_KEY=
7
+
8
+ AZURE_OPENAI_ENDPOINT=
9
+ AZURE_OPENAI_API_KEY=
10
+
11
+ DEEPSEEK_ENDPOINT=https://api.deepseek.com
12
+ DEEPSEEK_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJqb3NlZGNhcGVAZ21haWwuY29tIiwiaWF0IjoxNzM3OTQwNDk0fQ.vXGKNvzvjfpIDDGuWbK-Lq44a5lGc___WrJcm5R-60M
13
+
14
+ # Set to false to disable anonymized telemetry
15
+ ANONYMIZED_TELEMETRY=true
16
+
17
+ # LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
18
+ BROWSER_USE_LOGGING_LEVEL=info
19
+
20
+ # Chrome settings
21
+ CHROME_PATH=
22
+ CHROME_USER_DATA=
23
+ CHROME_DEBUGGING_PORT=9222
24
+ CHROME_DEBUGGING_HOST=localhost
25
+ # Set to true to keep browser open between AI tasks
26
+ CHROME_PERSISTENT_SESSION=false
27
+
28
+ # Display settings
29
+ # Format: WIDTHxHEIGHTxDEPTH
30
+ RESOLUTION=1920x1080x24
31
+ # Width in pixels
32
+ RESOLUTION_WIDTH=1920
33
+ # Height in pixels
34
+ RESOLUTION_HEIGHT=1080
35
+
36
+ # VNC settings
37
+ VNC_PASSWORD=youvncpassword
.gitignore ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110
+ .pdm.toml
111
+ .pdm-python
112
+ .pdm-build/
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+ test_env/
133
+ myenv
134
+
135
+
136
+ # Spyder project settings
137
+ .spyderproject
138
+ .spyproject
139
+
140
+ # Rope project settings
141
+ .ropeproject
142
+
143
+ # mkdocs documentation
144
+ /site
145
+
146
+ # mypy
147
+ .mypy_cache/
148
+ .dmypy.json
149
+ dmypy.json
150
+
151
+ # Pyre type checker
152
+ .pyre/
153
+
154
+ # pytype static type analyzer
155
+ .pytype/
156
+
157
+ # Cython debug symbols
158
+ cython_debug/
159
+
160
+ # PyCharm
161
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
164
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
165
+ .idea/
166
+ temp
167
+ tmp
168
+
169
+
170
+ .DS_Store
171
+
172
+ private_example.py
173
+ private_example
174
+
175
+ browser_cookies.json
176
+ cookies.json
177
+ AgentHistory.json
178
+ cv_04_24.pdf
179
+ AgentHistoryList.json
180
+ *.gif
181
+
182
+ # For Sharing (.pem files)
183
+ .gradio/
184
+
185
+ # For Docker
186
+ data/
187
+
188
+ # For Config Files (Current Settings)
189
+ .config.pkl
Dockerfile ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && apt-get install -y \
5
+ wget \
6
+ gnupg \
7
+ curl \
8
+ unzip \
9
+ xvfb \
10
+ libgconf-2-4 \
11
+ libxss1 \
12
+ libnss3 \
13
+ libnspr4 \
14
+ libasound2 \
15
+ libatk1.0-0 \
16
+ libatk-bridge2.0-0 \
17
+ libcups2 \
18
+ libdbus-1-3 \
19
+ libdrm2 \
20
+ libgbm1 \
21
+ libgtk-3-0 \
22
+ libxcomposite1 \
23
+ libxdamage1 \
24
+ libxfixes3 \
25
+ libxrandr2 \
26
+ xdg-utils \
27
+ fonts-liberation \
28
+ dbus \
29
+ xauth \
30
+ xvfb \
31
+ x11vnc \
32
+ tigervnc-tools \
33
+ supervisor \
34
+ net-tools \
35
+ procps \
36
+ git \
37
+ python3-numpy \
38
+ fontconfig \
39
+ fonts-dejavu \
40
+ fonts-dejavu-core \
41
+ fonts-dejavu-extra \
42
+ && rm -rf /var/lib/apt/lists/*
43
+
44
+ # Install noVNC
45
+ RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
46
+ && git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
47
+ && ln -s /opt/novnc/vnc.html /opt/novnc/index.html
48
+
49
+ # Install Chrome
50
+ RUN curl -fsSL https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg \
51
+ && echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" | tee /etc/apt/sources.list.d/google-chrome.list
52
+
53
+ # Set up working directory
54
+ WORKDIR /app
55
+
56
+ # Copy requirements and install Python dependencies
57
+ COPY requirements.txt .
58
+ RUN pip install --no-cache-dir -r requirements.txt
59
+
60
+ # Install Playwright and browsers with system dependencies
61
+ ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
62
+ RUN playwright install --with-deps chromium
63
+ RUN playwright install-deps
64
+ RUN apt-get install -y google-chrome-stable
65
+
66
+ # Copy the application code
67
+ COPY . .
68
+
69
+ # Set environment variables
70
+ ENV PYTHONUNBUFFERED=1
71
+ ENV BROWSER_USE_LOGGING_LEVEL=info
72
+ ENV CHROME_PATH=/usr/bin/google-chrome
73
+ ENV ANONYMIZED_TELEMETRY=false
74
+ ENV DISPLAY=:99
75
+ ENV RESOLUTION=1920x1080x24
76
+ ENV VNC_PASSWORD=vncpassword
77
+ ENV CHROME_PERSISTENT_SESSION=true
78
+ ENV RESOLUTION_WIDTH=1920
79
+ ENV RESOLUTION_HEIGHT=1080
80
+
81
+ # Set up supervisor configuration
82
+ RUN mkdir -p /var/log/supervisor
83
+ COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
84
+
85
+ EXPOSE 7788 6080 5900
86
+
87
+ CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Browser Use Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,14 +1,184 @@
1
- ---
2
- title: Webui
3
- emoji: 💻
4
- colorFrom: purple
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 5.13.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: 'Automatizacion de paginas web '
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <img src="./assets/web-ui.png" alt="Browser Use Web UI" width="full"/>
2
+
3
+ <br/>
4
+
5
+ [![GitHub stars](https://img.shields.io/github/stars/browser-use/web-ui?style=social)](https://github.com/browser-use/web-ui/stargazers)
6
+ [![Discord](https://img.shields.io/discord/1303749220842340412?color=7289DA&label=Discord&logo=discord&logoColor=white)](https://link.browser-use.com/discord)
7
+ [![Documentation](https://img.shields.io/badge/Documentation-📕-blue)](https://docs.browser-use.com)
8
+ [![WarmShao](https://img.shields.io/twitter/follow/warmshao?style=social)](https://x.com/warmshao)
9
+
10
+ This project builds upon the foundation of the [browser-use](https://github.com/browser-use/browser-use), which is designed to make websites accessible for AI agents.
11
+
12
+ We would like to officially thank [WarmShao](https://github.com/warmshao) for his contribution to this project.
13
+
14
+ **WebUI:** is built on Gradio and supports a most of `browser-use` functionalities. This UI is designed to be user-friendly and enables easy interaction with the browser agent.
15
+
16
+ **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic, DeepSeek, Ollama etc. And we plan to add support for even more models in the future.
17
+
18
+ **Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.
19
+
20
+ **Persistent Browser Sessions:** You can choose to keep the browser window open between AI tasks, allowing you to see the complete history and state of AI interactions.
21
+
22
+ <video src="https://github.com/user-attachments/assets/56bc7080-f2e3-4367-af22-6bf2245ff6cb" controls="controls">Your browser does not support playing this video!</video>
23
+
24
+ ## Installation Options
25
+
26
+ ### Option 1: Local Installation
27
+
28
+ Read the [quickstart guide](https://docs.browser-use.com/quickstart#prepare-the-environment) or follow the steps below to get started.
29
+
30
+ > Python 3.11 or higher is required.
31
+
32
+ First, we recommend using [uv](https://docs.astral.sh/uv/) to setup the Python environment.
33
+
34
+ ```bash
35
+ uv venv --python 3.11
36
+ ```
37
+
38
+ and activate it with:
39
+
40
+ ```bash
41
+ source .venv/bin/activate
42
+ ```
43
+
44
+ Install the dependencies:
45
+
46
+ ```bash
47
+ uv pip install -r requirements.txt
48
+ ```
49
+
50
+ Then install playwright:
51
+
52
+ ```bash
53
+ playwright install
54
+ ```
55
+
56
+ ### Option 2: Docker Installation
57
+
58
+ 1. **Prerequisites:**
59
+ - Docker and Docker Compose installed on your system
60
+ - Git to clone the repository
61
+
62
+ 2. **Setup:**
63
+ ```bash
64
+ # Clone the repository
65
+ git clone https://github.com/browser-use/web-ui.git
66
+ cd web-ui
67
+
68
+ # Copy and configure environment variables
69
+ cp .env.example .env
70
+ # Edit .env with your preferred text editor and add your API keys
71
+ ```
72
+
73
+ 3. **Run with Docker:**
74
+ ```bash
75
+ # Build and start the container with default settings (browser closes after AI tasks)
76
+ docker compose up --build
77
+
78
+ # Or run with persistent browser (browser stays open between AI tasks)
79
+ CHROME_PERSISTENT_SESSION=true docker compose up --build
80
+ ```
81
+
82
+ 4. **Access the Application:**
83
+ - WebUI: `http://localhost:7788`
84
+ - VNC Viewer (to see browser interactions): `http://localhost:6080/vnc.html`
85
+
86
+ Default VNC password is "vncpassword". You can change it by setting the `VNC_PASSWORD` environment variable in your `.env` file.
87
+
88
+
89
+ ## Usage
90
+
91
+ ### Local Setup
92
+ 1. Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM. `cp .env.example .env`
93
+ 2. **Run the WebUI:**
94
+ ```bash
95
+ python webui.py --ip 127.0.0.1 --port 7788
96
+ ```
97
+ 4. WebUI options:
98
+ - `--ip`: The IP address to bind the WebUI to. Default is `127.0.0.1`.
99
+ - `--port`: The port to bind the WebUI to. Default is `7788`.
100
+ - `--theme`: The theme for the user interface. Default is `Ocean`.
101
+ - **Default**: The standard theme with a balanced design.
102
+ - **Soft**: A gentle, muted color scheme for a relaxed viewing experience.
103
+ - **Monochrome**: A grayscale theme with minimal color for simplicity and focus.
104
+ - **Glass**: A sleek, semi-transparent design for a modern appearance.
105
+ - **Origin**: A classic, retro-inspired theme for a nostalgic feel.
106
+ - **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors.
107
+ - **Ocean** (default): A blue, ocean-inspired theme providing a calming effect.
108
+ - `--dark-mode`: Enables dark mode for the user interface.
109
+ 3. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
110
+ 4. **Using Your Own Browser(Optional):**
111
+ - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser.
112
+ - Windows
113
+ ```env
114
+ CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
115
+ CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
116
+ ```
117
+ > Note: Replace `YourUsername` with your actual Windows username for Windows systems.
118
+ - Mac
119
+ ```env
120
+ CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
121
+ CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1"
122
+ ```
123
+ - Close all Chrome windows
124
+ - Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
125
+ - Check the "Use Own Browser" option within the Browser Settings.
126
+ 5. **Keep Browser Open(Optional):**
127
+ - Set `CHROME_PERSISTENT_SESSION=true` in the `.env` file.
128
+
129
+ ### Docker Setup
130
+ 1. **Environment Variables:**
131
+ - All configuration is done through the `.env` file
132
+ - Available environment variables:
133
+ ```
134
+ # LLM API Keys
135
+ OPENAI_API_KEY=your_key_here
136
+ ANTHROPIC_API_KEY=your_key_here
137
+ GOOGLE_API_KEY=your_key_here
138
+
139
+ # Browser Settings
140
+ CHROME_PERSISTENT_SESSION=true # Set to true to keep browser open between AI tasks
141
+ RESOLUTION=1920x1080x24 # Custom resolution format: WIDTHxHEIGHTxDEPTH
142
+ RESOLUTION_WIDTH=1920 # Custom width in pixels
143
+ RESOLUTION_HEIGHT=1080 # Custom height in pixels
144
+
145
+ # VNC Settings
146
+ VNC_PASSWORD=your_vnc_password # Optional, defaults to "vncpassword"
147
+ ```
148
+
149
+ 2. **Browser Persistence Modes:**
150
+ - **Default Mode (CHROME_PERSISTENT_SESSION=false):**
151
+ - Browser opens and closes with each AI task
152
+ - Clean state for each interaction
153
+ - Lower resource usage
154
+
155
+ - **Persistent Mode (CHROME_PERSISTENT_SESSION=true):**
156
+ - Browser stays open between AI tasks
157
+ - Maintains history and state
158
+ - Allows viewing previous AI interactions
159
+ - Set in `.env` file or via environment variable when starting container
160
+
161
+ 3. **Viewing Browser Interactions:**
162
+ - Access the noVNC viewer at `http://localhost:6080/vnc.html`
163
+ - Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD)
164
+ - You can now see all browser interactions in real-time
165
+
166
+ 4. **Container Management:**
167
+ ```bash
168
+ # Start with persistent browser
169
+ CHROME_PERSISTENT_SESSION=true docker compose up -d
170
+
171
+ # Start with default mode (browser closes after tasks)
172
+ docker compose up -d
173
+
174
+ # View logs
175
+ docker compose logs -f
176
+
177
+ # Stop the container
178
+ docker compose down
179
+ ```
180
+
181
+ ## Changelog
182
+ - [x] **2025/01/26:** Thanks to @vvincent1234. Now browser-use-webui can combine with DeepSeek-r1 to engage in deep thinking!
183
+ - [x] **2025/01/10:** Thanks to @casistack. Now we have Docker Setup option and also Support keep browser open between tasks.[Video tutorial demo](https://github.com/browser-use/web-ui/issues/1#issuecomment-2582511750).
184
+ - [x] **2025/01/06:** Thanks to @richard-devbot. A New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).
SECURITY.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Reporting Security Issues
2
+
3
+ If you believe you have found a security vulnerability in browser-use, please report it through coordinated disclosure.
4
+
5
+ **Please do not report security vulnerabilities through the repository issues, discussions, or pull requests.**
6
+
7
+ Instead, please open a new [Github security advisory](https://github.com/browser-use/web-ui/security/advisories/new).
8
+
9
+ Please include as much of the information listed below as you can to help me better understand and resolve the issue:
10
+
11
+ * The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting)
12
+ * Full paths of source file(s) related to the manifestation of the issue
13
+ * The location of the affected source code (tag/branch/commit or direct URL)
14
+ * Any special configuration required to reproduce the issue
15
+ * Step-by-step instructions to reproduce the issue
16
+ * Proof-of-concept or exploit code (if possible)
17
+ * Impact of the issue, including how an attacker might exploit the issue
18
+
19
+ This information will help me triage your report more quickly.
agent_history.gif ADDED
docker-compose.yml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ browser-use-webui:
3
+ build:
4
+ context: .
5
+ dockerfile: Dockerfile
6
+ ports:
7
+ - "7788:7788" # Gradio default port
8
+ - "6080:6080" # noVNC web interface
9
+ - "5900:5900" # VNC port
10
+ - "9222:9222" # Chrome remote debugging port
11
+ environment:
12
+ - OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
13
+ - OPENAI_API_KEY=${OPENAI_API_KEY:-}
14
+ - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
15
+ - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
16
+ - AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-}
17
+ - AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-}
18
+ - DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com}
19
+ - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
20
+ - BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
21
+ - ANONYMIZED_TELEMETRY=false
22
+ - CHROME_PATH=/usr/bin/google-chrome
23
+ - CHROME_USER_DATA=/app/data/chrome_data
24
+ - CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false}
25
+ - DISPLAY=:99
26
+ - PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
27
+ - RESOLUTION=${RESOLUTION:-1920x1080x24}
28
+ - RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
29
+ - RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
30
+ - VNC_PASSWORD=${VNC_PASSWORD:-vncpassword}
31
+ - CHROME_DEBUGGING_PORT=9222
32
+ - CHROME_DEBUGGING_HOST=localhost
33
+ volumes:
34
+ - /tmp/.X11-unix:/tmp/.X11-unix
35
+ restart: unless-stopped
36
+ shm_size: '2gb'
37
+ cap_add:
38
+ - SYS_ADMIN
39
+ security_opt:
40
+ - seccomp=unconfined
41
+ tmpfs:
42
+ - /tmp
43
+ healthcheck:
44
+ test: ["CMD", "nc", "-z", "localhost", "5900"]
45
+ interval: 10s
46
+ timeout: 5s
47
+ retries: 3
pyvenv.cfg ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ home = C:\Users\Usuario\AppData\Local\Programs\Python\Python312
2
+ include-system-site-packages = false
3
+ version = 3.12.4
4
+ executable = C:\Users\Usuario\AppData\Local\Programs\Python\Python312\python.exe
5
+ command = C:\Users\Usuario\AppData\Local\Programs\Python\Python312\python.exe -m venv C:\Users\Usuario\web-ui
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ browser-use==0.1.19
2
+ langchain-google-genai==2.0.8
3
+ pyperclip==1.9.0
4
+ gradio==5.9.1
5
+ langchain-ollama==0.2.2
6
+ langchain-openai==0.2.14
supervisord.conf ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [supervisord]
2
+ nodaemon=true
3
+ logfile=/dev/stdout
4
+ logfile_maxbytes=0
5
+ loglevel=debug
6
+
7
+ [program:xvfb]
8
+ command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset
9
+ autorestart=true
10
+ stdout_logfile=/dev/stdout
11
+ stdout_logfile_maxbytes=0
12
+ stderr_logfile=/dev/stderr
13
+ stderr_logfile_maxbytes=0
14
+ priority=100
15
+ startsecs=3
16
+
17
+ [program:vnc_setup]
18
+ command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
19
+ autorestart=false
20
+ startsecs=0
21
+ priority=150
22
+ stdout_logfile=/dev/stdout
23
+ stdout_logfile_maxbytes=0
24
+ stderr_logfile=/dev/stderr
25
+ stderr_logfile_maxbytes=0
26
+
27
+ [program:x11vnc]
28
+ command=bash -c "sleep 3 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5900 -bg -o /var/log/x11vnc.log"
29
+ autorestart=true
30
+ stdout_logfile=/dev/stdout
31
+ stdout_logfile_maxbytes=0
32
+ stderr_logfile=/dev/stderr
33
+ stderr_logfile_maxbytes=0
34
+ priority=200
35
+ startretries=5
36
+ startsecs=5
37
+ depends_on=vnc_setup
38
+
39
+ [program:x11vnc_log]
40
+ command=tail -f /var/log/x11vnc.log
41
+ autorestart=true
42
+ stdout_logfile=/dev/stdout
43
+ stdout_logfile_maxbytes=0
44
+ stderr_logfile=/dev/stderr
45
+ stderr_logfile_maxbytes=0
46
+ priority=250
47
+
48
+ [program:novnc]
49
+ command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5900 --listen 0.0.0.0:6080 --web /opt/novnc"
50
+ autorestart=true
51
+ stdout_logfile=/dev/stdout
52
+ stdout_logfile_maxbytes=0
53
+ stderr_logfile=/dev/stderr
54
+ stderr_logfile_maxbytes=0
55
+ priority=300
56
+ startretries=5
57
+ startsecs=3
58
+ depends_on=x11vnc
59
+
60
+ [program:persistent_browser]
61
+ command=bash -c 'mkdir -p /app/data/chrome_data && sleep 8 && google-chrome --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 "data:text/html,<html><body style=\"background: \#f0f0f0; margin: 0; display: flex; justify-content: center; align-items: center; height: 100vh; font-family: Arial;\"><h1>Browser Ready for AI Interaction</h1></body></html>"'
62
+ autorestart=true
63
+ stdout_logfile=/dev/stdout
64
+ stdout_logfile_maxbytes=0
65
+ stderr_logfile=/dev/stderr
66
+ stderr_logfile_maxbytes=0
67
+ priority=350
68
+ startretries=3
69
+ startsecs=3
70
+ depends_on=novnc
71
+
72
+ [program:webui]
73
+ command=python webui.py --ip 0.0.0.0 --port 7788
74
+ directory=/app
75
+ autorestart=true
76
+ stdout_logfile=/dev/stdout
77
+ stdout_logfile_maxbytes=0
78
+ stderr_logfile=/dev/stderr
79
+ stderr_logfile_maxbytes=0
80
+ priority=400
81
+ startretries=3
82
+ startsecs=3
83
+ depends_on=persistent_browser
webui.py ADDED
@@ -0,0 +1,952 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdb
2
+ import logging
3
+
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+ import os
8
+ import glob
9
+ import asyncio
10
+ import argparse
11
+ import os
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ import gradio as gr
16
+
17
+ from browser_use.agent.service import Agent
18
+ from playwright.async_api import async_playwright
19
+ from browser_use.browser.browser import Browser, BrowserConfig
20
+ from browser_use.browser.context import (
21
+ BrowserContextConfig,
22
+ BrowserContextWindowSize,
23
+ )
24
+ from playwright.async_api import async_playwright
25
+ from src.utils.agent_state import AgentState
26
+
27
+ from src.utils import utils
28
+ from src.agent.custom_agent import CustomAgent
29
+ from src.browser.custom_browser import CustomBrowser
30
+ from src.agent.custom_prompts import CustomSystemPrompt
31
+ from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext
32
+ from src.controller.custom_controller import CustomController
33
+ from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base
34
+ from src.utils.default_config_settings import default_config, load_config_from_file, save_config_to_file, save_current_config, update_ui_from_config
35
+ from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot
36
+
37
+
38
+ # Global variables for persistence
39
+ _global_browser = None
40
+ _global_browser_context = None
41
+
42
+ # Create the global agent state instance
43
+ _global_agent_state = AgentState()
44
+
45
+ async def stop_agent():
46
+ """Request the agent to stop and update UI with enhanced feedback"""
47
+ global _global_agent_state, _global_browser_context, _global_browser
48
+
49
+ try:
50
+ # Request stop
51
+ _global_agent_state.request_stop()
52
+
53
+ # Update UI immediately
54
+ message = "Stop requested - the agent will halt at the next safe point"
55
+ logger.info(f"🛑 {message}")
56
+
57
+ # Return UI updates
58
+ return (
59
+ message, # errors_output
60
+ gr.update(value="Stopping...", interactive=False), # stop_button
61
+ gr.update(interactive=False), # run_button
62
+ )
63
+ except Exception as e:
64
+ error_msg = f"Error during stop: {str(e)}"
65
+ logger.error(error_msg)
66
+ return (
67
+ error_msg,
68
+ gr.update(value="Stop", interactive=True),
69
+ gr.update(interactive=True)
70
+ )
71
+
72
+ async def run_browser_agent(
73
+ agent_type,
74
+ llm_provider,
75
+ llm_model_name,
76
+ llm_temperature,
77
+ llm_base_url,
78
+ llm_api_key,
79
+ use_own_browser,
80
+ keep_browser_open,
81
+ headless,
82
+ disable_security,
83
+ window_w,
84
+ window_h,
85
+ save_recording_path,
86
+ save_agent_history_path,
87
+ save_trace_path,
88
+ enable_recording,
89
+ task,
90
+ add_infos,
91
+ max_steps,
92
+ use_vision,
93
+ max_actions_per_step,
94
+ tool_call_in_content
95
+ ):
96
+ global _global_agent_state
97
+ _global_agent_state.clear_stop() # Clear any previous stop requests
98
+
99
+ try:
100
+ # Disable recording if the checkbox is unchecked
101
+ if not enable_recording:
102
+ save_recording_path = None
103
+
104
+ # Ensure the recording directory exists if recording is enabled
105
+ if save_recording_path:
106
+ os.makedirs(save_recording_path, exist_ok=True)
107
+
108
+ # Get the list of existing videos before the agent runs
109
+ existing_videos = set()
110
+ if save_recording_path:
111
+ existing_videos = set(
112
+ glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
113
+ + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
114
+ )
115
+
116
+ # Run the agent
117
+ llm = utils.get_llm_model(
118
+ provider=llm_provider,
119
+ model_name=llm_model_name,
120
+ temperature=llm_temperature,
121
+ base_url=llm_base_url,
122
+ api_key=llm_api_key,
123
+ )
124
+ if agent_type == "org":
125
+ final_result, errors, model_actions, model_thoughts, trace_file, history_file = await run_org_agent(
126
+ llm=llm,
127
+ use_own_browser=use_own_browser,
128
+ keep_browser_open=keep_browser_open,
129
+ headless=headless,
130
+ disable_security=disable_security,
131
+ window_w=window_w,
132
+ window_h=window_h,
133
+ save_recording_path=save_recording_path,
134
+ save_agent_history_path=save_agent_history_path,
135
+ save_trace_path=save_trace_path,
136
+ task=task,
137
+ max_steps=max_steps,
138
+ use_vision=use_vision,
139
+ max_actions_per_step=max_actions_per_step,
140
+ tool_call_in_content=tool_call_in_content
141
+ )
142
+ elif agent_type == "custom":
143
+ final_result, errors, model_actions, model_thoughts, trace_file, history_file = await run_custom_agent(
144
+ llm=llm,
145
+ use_own_browser=use_own_browser,
146
+ keep_browser_open=keep_browser_open,
147
+ headless=headless,
148
+ disable_security=disable_security,
149
+ window_w=window_w,
150
+ window_h=window_h,
151
+ save_recording_path=save_recording_path,
152
+ save_agent_history_path=save_agent_history_path,
153
+ save_trace_path=save_trace_path,
154
+ task=task,
155
+ add_infos=add_infos,
156
+ max_steps=max_steps,
157
+ use_vision=use_vision,
158
+ max_actions_per_step=max_actions_per_step,
159
+ tool_call_in_content=tool_call_in_content
160
+ )
161
+ else:
162
+ raise ValueError(f"Invalid agent type: {agent_type}")
163
+
164
+ # Get the list of videos after the agent runs (if recording is enabled)
165
+ latest_video = None
166
+ if save_recording_path:
167
+ new_videos = set(
168
+ glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
169
+ + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
170
+ )
171
+ if new_videos - existing_videos:
172
+ latest_video = list(new_videos - existing_videos)[0] # Get the first new video
173
+
174
+ return (
175
+ final_result,
176
+ errors,
177
+ model_actions,
178
+ model_thoughts,
179
+ latest_video,
180
+ trace_file,
181
+ history_file,
182
+ gr.update(value="Stop", interactive=True), # Re-enable stop button
183
+ gr.update(interactive=True) # Re-enable run button
184
+ )
185
+
186
+ except Exception as e:
187
+ import traceback
188
+ traceback.print_exc()
189
+ errors = str(e) + "\n" + traceback.format_exc()
190
+ return (
191
+ '', # final_result
192
+ errors, # errors
193
+ '', # model_actions
194
+ '', # model_thoughts
195
+ None, # latest_video
196
+ None, # history_file
197
+ None, # trace_file
198
+ gr.update(value="Stop", interactive=True), # Re-enable stop button
199
+ gr.update(interactive=True) # Re-enable run button
200
+ )
201
+
202
+
203
+ async def run_org_agent(
204
+ llm,
205
+ use_own_browser,
206
+ keep_browser_open,
207
+ headless,
208
+ disable_security,
209
+ window_w,
210
+ window_h,
211
+ save_recording_path,
212
+ save_agent_history_path,
213
+ save_trace_path,
214
+ task,
215
+ max_steps,
216
+ use_vision,
217
+ max_actions_per_step,
218
+ tool_call_in_content
219
+ ):
220
+ try:
221
+ global _global_browser, _global_browser_context, _global_agent_state
222
+
223
+ # Clear any previous stop request
224
+ _global_agent_state.clear_stop()
225
+
226
+ if use_own_browser:
227
+ chrome_path = os.getenv("CHROME_PATH", None)
228
+ if chrome_path == "":
229
+ chrome_path = None
230
+ else:
231
+ chrome_path = None
232
+
233
+ if _global_browser is None:
234
+ _global_browser = Browser(
235
+ config=BrowserConfig(
236
+ headless=headless,
237
+ disable_security=disable_security,
238
+ chrome_instance_path=chrome_path,
239
+ extra_chromium_args=[f"--window-size={window_w},{window_h}"],
240
+ )
241
+ )
242
+
243
+ if _global_browser_context is None:
244
+ _global_browser_context = await _global_browser.new_context(
245
+ config=BrowserContextConfig(
246
+ trace_path=save_trace_path if save_trace_path else None,
247
+ save_recording_path=save_recording_path if save_recording_path else None,
248
+ no_viewport=False,
249
+ browser_window_size=BrowserContextWindowSize(
250
+ width=window_w, height=window_h
251
+ ),
252
+ )
253
+ )
254
+
255
+ agent = Agent(
256
+ task=task,
257
+ llm=llm,
258
+ use_vision=use_vision,
259
+ browser=_global_browser,
260
+ browser_context=_global_browser_context,
261
+ max_actions_per_step=max_actions_per_step,
262
+ tool_call_in_content=tool_call_in_content
263
+ )
264
+ history = await agent.run(max_steps=max_steps)
265
+
266
+ history_file = os.path.join(save_agent_history_path, f"{agent.agent_id}.json")
267
+ agent.save_history(history_file)
268
+
269
+ final_result = history.final_result()
270
+ errors = history.errors()
271
+ model_actions = history.model_actions()
272
+ model_thoughts = history.model_thoughts()
273
+
274
+ trace_file = get_latest_files(save_trace_path)
275
+
276
+ return final_result, errors, model_actions, model_thoughts, trace_file.get('.zip'), history_file
277
+ except Exception as e:
278
+ import traceback
279
+ traceback.print_exc()
280
+ errors = str(e) + "\n" + traceback.format_exc()
281
+ return '', errors, '', '', None, None
282
+ finally:
283
+ # Handle cleanup based on persistence configuration
284
+ if not keep_browser_open:
285
+ if _global_browser_context:
286
+ await _global_browser_context.close()
287
+ _global_browser_context = None
288
+
289
+ if _global_browser:
290
+ await _global_browser.close()
291
+ _global_browser = None
292
+
293
+ async def run_custom_agent(
294
+ llm,
295
+ use_own_browser,
296
+ keep_browser_open,
297
+ headless,
298
+ disable_security,
299
+ window_w,
300
+ window_h,
301
+ save_recording_path,
302
+ save_agent_history_path,
303
+ save_trace_path,
304
+ task,
305
+ add_infos,
306
+ max_steps,
307
+ use_vision,
308
+ max_actions_per_step,
309
+ tool_call_in_content
310
+ ):
311
+ try:
312
+ global _global_browser, _global_browser_context, _global_agent_state
313
+
314
+ # Clear any previous stop request
315
+ _global_agent_state.clear_stop()
316
+
317
+ if use_own_browser:
318
+ chrome_path = os.getenv("CHROME_PATH", None)
319
+ if chrome_path == "":
320
+ chrome_path = None
321
+ else:
322
+ chrome_path = None
323
+
324
+ controller = CustomController()
325
+
326
+ # Initialize global browser if needed
327
+ if _global_browser is None:
328
+ _global_browser = CustomBrowser(
329
+ config=BrowserConfig(
330
+ headless=headless,
331
+ disable_security=disable_security,
332
+ chrome_instance_path=chrome_path,
333
+ extra_chromium_args=[f"--window-size={window_w},{window_h}"],
334
+ )
335
+ )
336
+
337
+ if _global_browser_context is None:
338
+ _global_browser_context = await _global_browser.new_context(
339
+ config=BrowserContextConfig(
340
+ trace_path=save_trace_path if save_trace_path else None,
341
+ save_recording_path=save_recording_path if save_recording_path else None,
342
+ no_viewport=False,
343
+ browser_window_size=BrowserContextWindowSize(
344
+ width=window_w, height=window_h
345
+ ),
346
+ )
347
+ )
348
+
349
+ # Create and run agent
350
+ agent = CustomAgent(
351
+ task=task,
352
+ add_infos=add_infos,
353
+ use_vision=use_vision,
354
+ llm=llm,
355
+ browser=_global_browser,
356
+ browser_context=_global_browser_context,
357
+ controller=controller,
358
+ system_prompt_class=CustomSystemPrompt,
359
+ max_actions_per_step=max_actions_per_step,
360
+ tool_call_in_content=tool_call_in_content,
361
+ agent_state=_global_agent_state
362
+ )
363
+ history = await agent.run(max_steps=max_steps)
364
+
365
+ history_file = os.path.join(save_agent_history_path, f"{agent.agent_id}.json")
366
+ agent.save_history(history_file)
367
+
368
+ final_result = history.final_result()
369
+ errors = history.errors()
370
+ model_actions = history.model_actions()
371
+ model_thoughts = history.model_thoughts()
372
+
373
+ trace_file = get_latest_files(save_trace_path)
374
+
375
+ return final_result, errors, model_actions, model_thoughts, trace_file.get('.zip'), history_file
376
+ except Exception as e:
377
+ import traceback
378
+ traceback.print_exc()
379
+ errors = str(e) + "\n" + traceback.format_exc()
380
+ return '', errors, '', '', None, None
381
+ finally:
382
+ # Handle cleanup based on persistence configuration
383
+ if not keep_browser_open:
384
+ if _global_browser_context:
385
+ await _global_browser_context.close()
386
+ _global_browser_context = None
387
+
388
+ if _global_browser:
389
+ await _global_browser.close()
390
+ _global_browser = None
391
+
392
+ async def run_with_stream(
393
+ agent_type,
394
+ llm_provider,
395
+ llm_model_name,
396
+ llm_temperature,
397
+ llm_base_url,
398
+ llm_api_key,
399
+ use_own_browser,
400
+ keep_browser_open,
401
+ headless,
402
+ disable_security,
403
+ window_w,
404
+ window_h,
405
+ save_recording_path,
406
+ save_agent_history_path,
407
+ save_trace_path,
408
+ enable_recording,
409
+ task,
410
+ add_infos,
411
+ max_steps,
412
+ use_vision,
413
+ max_actions_per_step,
414
+ tool_call_in_content
415
+ ):
416
+ global _global_agent_state
417
+ stream_vw = 80
418
+ stream_vh = int(80 * window_h // window_w)
419
+ if not headless:
420
+ result = await run_browser_agent(
421
+ agent_type=agent_type,
422
+ llm_provider=llm_provider,
423
+ llm_model_name=llm_model_name,
424
+ llm_temperature=llm_temperature,
425
+ llm_base_url=llm_base_url,
426
+ llm_api_key=llm_api_key,
427
+ use_own_browser=use_own_browser,
428
+ keep_browser_open=keep_browser_open,
429
+ headless=headless,
430
+ disable_security=disable_security,
431
+ window_w=window_w,
432
+ window_h=window_h,
433
+ save_recording_path=save_recording_path,
434
+ save_agent_history_path=save_agent_history_path,
435
+ save_trace_path=save_trace_path,
436
+ enable_recording=enable_recording,
437
+ task=task,
438
+ add_infos=add_infos,
439
+ max_steps=max_steps,
440
+ use_vision=use_vision,
441
+ max_actions_per_step=max_actions_per_step,
442
+ tool_call_in_content=tool_call_in_content
443
+ )
444
+ # Add HTML content at the start of the result array
445
+ html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
446
+ yield [html_content] + list(result)
447
+ else:
448
+ try:
449
+ _global_agent_state.clear_stop()
450
+ # Run the browser agent in the background
451
+ agent_task = asyncio.create_task(
452
+ run_browser_agent(
453
+ agent_type=agent_type,
454
+ llm_provider=llm_provider,
455
+ llm_model_name=llm_model_name,
456
+ llm_temperature=llm_temperature,
457
+ llm_base_url=llm_base_url,
458
+ llm_api_key=llm_api_key,
459
+ use_own_browser=use_own_browser,
460
+ keep_browser_open=keep_browser_open,
461
+ headless=headless,
462
+ disable_security=disable_security,
463
+ window_w=window_w,
464
+ window_h=window_h,
465
+ save_recording_path=save_recording_path,
466
+ save_agent_history_path=save_agent_history_path,
467
+ save_trace_path=save_trace_path,
468
+ enable_recording=enable_recording,
469
+ task=task,
470
+ add_infos=add_infos,
471
+ max_steps=max_steps,
472
+ use_vision=use_vision,
473
+ max_actions_per_step=max_actions_per_step,
474
+ tool_call_in_content=tool_call_in_content
475
+ )
476
+ )
477
+
478
+ # Initialize values for streaming
479
+ html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
480
+ final_result = errors = model_actions = model_thoughts = ""
481
+ latest_videos = trace = history_file = None
482
+
483
+
484
+ # Periodically update the stream while the agent task is running
485
+ while not agent_task.done():
486
+ try:
487
+ encoded_screenshot = await capture_screenshot(_global_browser_context)
488
+ if encoded_screenshot is not None:
489
+ html_content = f'<img src="data:image/jpeg;base64,{encoded_screenshot}" style="width:{stream_vw}vw; height:{stream_vh}vh ; border:1px solid #ccc;">'
490
+ else:
491
+ html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
492
+ except Exception as e:
493
+ html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
494
+
495
+ if _global_agent_state and _global_agent_state.is_stop_requested():
496
+ yield [
497
+ html_content,
498
+ final_result,
499
+ errors,
500
+ model_actions,
501
+ model_thoughts,
502
+ latest_videos,
503
+ trace,
504
+ history_file,
505
+ gr.update(value="Stopping...", interactive=False), # stop_button
506
+ gr.update(interactive=False), # run_button
507
+ ]
508
+ break
509
+ else:
510
+ yield [
511
+ html_content,
512
+ final_result,
513
+ errors,
514
+ model_actions,
515
+ model_thoughts,
516
+ latest_videos,
517
+ trace,
518
+ history_file,
519
+ gr.update(value="Stop", interactive=True), # Re-enable stop button
520
+ gr.update(interactive=True) # Re-enable run button
521
+ ]
522
+ await asyncio.sleep(0.05)
523
+
524
+ # Once the agent task completes, get the results
525
+ try:
526
+ result = await agent_task
527
+ final_result, errors, model_actions, model_thoughts, latest_videos, trace, history_file, stop_button, run_button = result
528
+ except Exception as e:
529
+ errors = f"Agent error: {str(e)}"
530
+
531
+ yield [
532
+ html_content,
533
+ final_result,
534
+ errors,
535
+ model_actions,
536
+ model_thoughts,
537
+ latest_videos,
538
+ trace,
539
+ history_file,
540
+ stop_button,
541
+ run_button
542
+ ]
543
+
544
+ except Exception as e:
545
+ import traceback
546
+ yield [
547
+ f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>",
548
+ "",
549
+ f"Error: {str(e)}\n{traceback.format_exc()}",
550
+ "",
551
+ "",
552
+ None,
553
+ None,
554
+ None,
555
+ gr.update(value="Stop", interactive=True), # Re-enable stop button
556
+ gr.update(interactive=True) # Re-enable run button
557
+ ]
558
+
559
+ # Define the theme map globally
560
+ theme_map = {
561
+ "Default": Default(),
562
+ "Soft": Soft(),
563
+ "Monochrome": Monochrome(),
564
+ "Glass": Glass(),
565
+ "Origin": Origin(),
566
+ "Citrus": Citrus(),
567
+ "Ocean": Ocean(),
568
+ "Base": Base()
569
+ }
570
+
571
+ async def close_global_browser():
572
+ global _global_browser, _global_browser_context
573
+
574
+ if _global_browser_context:
575
+ await _global_browser_context.close()
576
+ _global_browser_context = None
577
+
578
+ if _global_browser:
579
+ await _global_browser.close()
580
+ _global_browser = None
581
+
582
+ def create_ui(config, theme_name="Ocean"):
583
+ css = """
584
+ .gradio-container {
585
+ max-width: 1200px !important;
586
+ margin: auto !important;
587
+ padding-top: 20px !important;
588
+ }
589
+ .header-text {
590
+ text-align: center;
591
+ margin-bottom: 30px;
592
+ }
593
+ .theme-section {
594
+ margin-bottom: 20px;
595
+ padding: 15px;
596
+ border-radius: 10px;
597
+ }
598
+ """
599
+
600
+ js = """
601
+ function refresh() {
602
+ const url = new URL(window.location);
603
+ if (url.searchParams.get('__theme') !== 'dark') {
604
+ url.searchParams.set('__theme', 'dark');
605
+ window.location.href = url.href;
606
+ }
607
+ }
608
+ """
609
+
610
+ with gr.Blocks(
611
+ title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js
612
+ ) as demo:
613
+ with gr.Row():
614
+ gr.Markdown(
615
+ """
616
+ # 🌐 Browser Use WebUI
617
+ ### Control your browser with AI assistance
618
+ """,
619
+ elem_classes=["header-text"],
620
+ )
621
+
622
+ with gr.Tabs() as tabs:
623
+ with gr.TabItem("⚙️ Agent Settings", id=1):
624
+ with gr.Group():
625
+ agent_type = gr.Radio(
626
+ ["org", "custom"],
627
+ label="Agent Type",
628
+ value=config['agent_type'],
629
+ info="Select the type of agent to use",
630
+ )
631
+ max_steps = gr.Slider(
632
+ minimum=1,
633
+ maximum=200,
634
+ value=config['max_steps'],
635
+ step=1,
636
+ label="Max Run Steps",
637
+ info="Maximum number of steps the agent will take",
638
+ )
639
+ max_actions_per_step = gr.Slider(
640
+ minimum=1,
641
+ maximum=20,
642
+ value=config['max_actions_per_step'],
643
+ step=1,
644
+ label="Max Actions per Step",
645
+ info="Maximum number of actions the agent will take per step",
646
+ )
647
+ use_vision = gr.Checkbox(
648
+ label="Use Vision",
649
+ value=config['use_vision'],
650
+ info="Enable visual processing capabilities",
651
+ )
652
+ tool_call_in_content = gr.Checkbox(
653
+ label="Use Tool Calls in Content",
654
+ value=config['tool_call_in_content'],
655
+ info="Enable Tool Calls in content",
656
+ )
657
+
658
+ with gr.TabItem("🔧 LLM Configuration", id=2):
659
+ with gr.Group():
660
+ llm_provider = gr.Dropdown(
661
+ choices=[provider for provider,model in utils.model_names.items()],
662
+ label="LLM Provider",
663
+ value=config['llm_provider'],
664
+ info="Select your preferred language model provider"
665
+ )
666
+ llm_model_name = gr.Dropdown(
667
+ label="Model Name",
668
+ choices=utils.model_names['openai'],
669
+ value=config['llm_model_name'],
670
+ interactive=True,
671
+ allow_custom_value=True, # Allow users to input custom model names
672
+ info="Select a model from the dropdown or type a custom model name"
673
+ )
674
+ llm_temperature = gr.Slider(
675
+ minimum=0.0,
676
+ maximum=2.0,
677
+ value=config['llm_temperature'],
678
+ step=0.1,
679
+ label="Temperature",
680
+ info="Controls randomness in model outputs"
681
+ )
682
+ with gr.Row():
683
+ llm_base_url = gr.Textbox(
684
+ label="Base URL",
685
+ value=config['llm_base_url'],
686
+ info="API endpoint URL (if required)"
687
+ )
688
+ llm_api_key = gr.Textbox(
689
+ label="API Key",
690
+ type="password",
691
+ value=config['llm_api_key'],
692
+ info="Your API key (leave blank to use .env)"
693
+ )
694
+
695
+ with gr.TabItem("🌐 Browser Settings", id=3):
696
+ with gr.Group():
697
+ with gr.Row():
698
+ use_own_browser = gr.Checkbox(
699
+ label="Use Own Browser",
700
+ value=config['use_own_browser'],
701
+ info="Use your existing browser instance",
702
+ )
703
+ keep_browser_open = gr.Checkbox(
704
+ label="Keep Browser Open",
705
+ value=config['keep_browser_open'],
706
+ info="Keep Browser Open between Tasks",
707
+ )
708
+ headless = gr.Checkbox(
709
+ label="Headless Mode",
710
+ value=config['headless'],
711
+ info="Run browser without GUI",
712
+ )
713
+ disable_security = gr.Checkbox(
714
+ label="Disable Security",
715
+ value=config['disable_security'],
716
+ info="Disable browser security features",
717
+ )
718
+ enable_recording = gr.Checkbox(
719
+ label="Enable Recording",
720
+ value=config['enable_recording'],
721
+ info="Enable saving browser recordings",
722
+ )
723
+
724
+ with gr.Row():
725
+ window_w = gr.Number(
726
+ label="Window Width",
727
+ value=config['window_w'],
728
+ info="Browser window width",
729
+ )
730
+ window_h = gr.Number(
731
+ label="Window Height",
732
+ value=config['window_h'],
733
+ info="Browser window height",
734
+ )
735
+
736
+ save_recording_path = gr.Textbox(
737
+ label="Recording Path",
738
+ placeholder="e.g. ./tmp/record_videos",
739
+ value=config['save_recording_path'],
740
+ info="Path to save browser recordings",
741
+ interactive=True, # Allow editing only if recording is enabled
742
+ )
743
+
744
+ save_trace_path = gr.Textbox(
745
+ label="Trace Path",
746
+ placeholder="e.g. ./tmp/traces",
747
+ value=config['save_trace_path'],
748
+ info="Path to save Agent traces",
749
+ interactive=True,
750
+ )
751
+
752
+ save_agent_history_path = gr.Textbox(
753
+ label="Agent History Save Path",
754
+ placeholder="e.g., ./tmp/agent_history",
755
+ value=config['save_agent_history_path'],
756
+ info="Specify the directory where agent history should be saved.",
757
+ interactive=True,
758
+ )
759
+
760
+ with gr.TabItem("🤖 Run Agent", id=4):
761
+ task = gr.Textbox(
762
+ label="Task Description",
763
+ lines=4,
764
+ placeholder="Enter your task here...",
765
+ value=config['task'],
766
+ info="Describe what you want the agent to do",
767
+ )
768
+ add_infos = gr.Textbox(
769
+ label="Additional Information",
770
+ lines=3,
771
+ placeholder="Add any helpful context or instructions...",
772
+ info="Optional hints to help the LLM complete the task",
773
+ )
774
+
775
+ with gr.Row():
776
+ run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
777
+ stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
778
+
779
+ with gr.Row():
780
+ browser_view = gr.HTML(
781
+ value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
782
+ label="Live Browser View",
783
+ )
784
+
785
+ with gr.TabItem("📁 Configuration", id=5):
786
+ with gr.Group():
787
+ config_file_input = gr.File(
788
+ label="Load Config File",
789
+ file_types=[".pkl"],
790
+ interactive=True
791
+ )
792
+
793
+ load_config_button = gr.Button("Load Existing Config From File", variant="primary")
794
+ save_config_button = gr.Button("Save Current Config", variant="primary")
795
+
796
+ config_status = gr.Textbox(
797
+ label="Status",
798
+ lines=2,
799
+ interactive=False
800
+ )
801
+
802
+ load_config_button.click(
803
+ fn=update_ui_from_config,
804
+ inputs=[config_file_input],
805
+ outputs=[
806
+ agent_type, max_steps, max_actions_per_step, use_vision, tool_call_in_content,
807
+ llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
808
+ use_own_browser, keep_browser_open, headless, disable_security, enable_recording,
809
+ window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path,
810
+ task, config_status
811
+ ]
812
+ )
813
+
814
+ save_config_button.click(
815
+ fn=save_current_config,
816
+ inputs=[
817
+ agent_type, max_steps, max_actions_per_step, use_vision, tool_call_in_content,
818
+ llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
819
+ use_own_browser, keep_browser_open, headless, disable_security,
820
+ enable_recording, window_w, window_h, save_recording_path, save_trace_path,
821
+ save_agent_history_path, task,
822
+ ],
823
+ outputs=[config_status]
824
+ )
825
+
826
+ with gr.TabItem("📊 Results", id=6):
827
+ with gr.Group():
828
+
829
+ recording_display = gr.Video(label="Latest Recording")
830
+
831
+ gr.Markdown("### Results")
832
+ with gr.Row():
833
+ with gr.Column():
834
+ final_result_output = gr.Textbox(
835
+ label="Final Result", lines=3, show_label=True
836
+ )
837
+ with gr.Column():
838
+ errors_output = gr.Textbox(
839
+ label="Errors", lines=3, show_label=True
840
+ )
841
+ with gr.Row():
842
+ with gr.Column():
843
+ model_actions_output = gr.Textbox(
844
+ label="Model Actions", lines=3, show_label=True
845
+ )
846
+ with gr.Column():
847
+ model_thoughts_output = gr.Textbox(
848
+ label="Model Thoughts", lines=3, show_label=True
849
+ )
850
+
851
+ trace_file = gr.File(label="Trace File")
852
+
853
+ agent_history_file = gr.File(label="Agent History")
854
+
855
+ # Bind the stop button click event after errors_output is defined
856
+ stop_button.click(
857
+ fn=stop_agent,
858
+ inputs=[],
859
+ outputs=[errors_output, stop_button, run_button],
860
+ )
861
+
862
+ # Run button click handler
863
+ run_button.click(
864
+ fn=run_with_stream,
865
+ inputs=[
866
+ agent_type, llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
867
+ use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h,
868
+ save_recording_path, save_agent_history_path, save_trace_path, # Include the new path
869
+ enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_call_in_content
870
+ ],
871
+ outputs=[
872
+ browser_view, # Browser view
873
+ final_result_output, # Final result
874
+ errors_output, # Errors
875
+ model_actions_output, # Model actions
876
+ model_thoughts_output, # Model thoughts
877
+ recording_display, # Latest recording
878
+ trace_file, # Trace file
879
+ agent_history_file, # Agent history file
880
+ stop_button, # Stop button
881
+ run_button # Run button
882
+ ],
883
+ )
884
+
885
+ with gr.TabItem("🎥 Recordings", id=7):
886
+ def list_recordings(save_recording_path):
887
+ if not os.path.exists(save_recording_path):
888
+ return []
889
+
890
+ # Get all video files
891
+ recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
892
+
893
+ # Sort recordings by creation time (oldest first)
894
+ recordings.sort(key=os.path.getctime)
895
+
896
+ # Add numbering to the recordings
897
+ numbered_recordings = []
898
+ for idx, recording in enumerate(recordings, start=1):
899
+ filename = os.path.basename(recording)
900
+ numbered_recordings.append((recording, f"{idx}. {filename}"))
901
+
902
+ return numbered_recordings
903
+
904
+ recordings_gallery = gr.Gallery(
905
+ label="Recordings",
906
+ value=list_recordings(config['save_recording_path']),
907
+ columns=3,
908
+ height="auto",
909
+ object_fit="contain"
910
+ )
911
+
912
+ refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary")
913
+ refresh_button.click(
914
+ fn=list_recordings,
915
+ inputs=save_recording_path,
916
+ outputs=recordings_gallery
917
+ )
918
+
919
+ # Attach the callback to the LLM provider dropdown
920
+ llm_provider.change(
921
+ lambda provider, api_key, base_url: update_model_dropdown(provider, api_key, base_url),
922
+ inputs=[llm_provider, llm_api_key, llm_base_url],
923
+ outputs=llm_model_name
924
+ )
925
+
926
+ # Add this after defining the components
927
+ enable_recording.change(
928
+ lambda enabled: gr.update(interactive=enabled),
929
+ inputs=enable_recording,
930
+ outputs=save_recording_path
931
+ )
932
+
933
+ use_own_browser.change(fn=close_global_browser)
934
+ keep_browser_open.change(fn=close_global_browser)
935
+
936
+ return demo
937
+
938
+ def main():
939
+ parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
940
+ parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
941
+ parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
942
+ parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
943
+ parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
944
+ args = parser.parse_args()
945
+
946
+ config_dict = default_config()
947
+
948
+ demo = create_ui(config_dict, theme_name=args.theme)
949
+ demo.launch(server_name=args.ip, server_port=args.port)
950
+
951
+ if __name__ == '__main__':
952
+ main()