Spaces:

Josedcape
/

Webui

Sleeping

App Files Files Community

Josedcape commited on Jan 28, 2025

Commit

ca69a38

verified ·

1 Parent(s): 89ea86d

Upload 13 files

Browse files

Files changed (13) hide show

.dockerignore +2 -0
.env +37 -0
.gitignore +189 -0
Dockerfile +87 -0
LICENSE +21 -0
README.md +184 -14
SECURITY.md +19 -0
agent_history.gif +0 -0
docker-compose.yml +47 -0
pyvenv.cfg +5 -0
requirements.txt +6 -0
supervisord.conf +83 -0
webui.py +952 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ data
2	+ tmp

.env ADDED Viewed

	@@ -0,0 +1,37 @@

+OPENAI_ENDPOINT=https://api.openai.com/v1
+OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
+GOOGLE_API_KEY=
+AZURE_OPENAI_ENDPOINT=
+AZURE_OPENAI_API_KEY=
+DEEPSEEK_ENDPOINT=https://api.deepseek.com
+DEEPSEEK_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJqb3NlZGNhcGVAZ21haWwuY29tIiwiaWF0IjoxNzM3OTQwNDk0fQ.vXGKNvzvjfpIDDGuWbK-Lq44a5lGc___WrJcm5R-60M
+# Set to false to disable anonymized telemetry
+ANONYMIZED_TELEMETRY=true
+# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
+BROWSER_USE_LOGGING_LEVEL=info
+# Chrome settings
+CHROME_PATH=
+CHROME_USER_DATA=
+CHROME_DEBUGGING_PORT=9222
+CHROME_DEBUGGING_HOST=localhost
+# Set to true to keep browser open between AI tasks
+CHROME_PERSISTENT_SESSION=false
+# Display settings
+# Format: WIDTHxHEIGHTxDEPTH
+RESOLUTION=1920x1080x24
+# Width in pixels
+RESOLUTION_WIDTH=1920
+# Height in pixels
+RESOLUTION_HEIGHT=1080
+# VNC settings
+VNC_PASSWORD=youvncpassword

.gitignore ADDED Viewed

	@@ -0,0 +1,189 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+test_env/
+myenv
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+temp
+tmp
+.DS_Store
+private_example.py
+private_example
+browser_cookies.json
+cookies.json
+AgentHistory.json
+cv_04_24.pdf
+AgentHistoryList.json
+*.gif
+# For Sharing (.pem files)
+.gradio/
+# For Docker
+data/
+# For Config Files (Current Settings)
+.config.pkl

Dockerfile ADDED Viewed

	@@ -0,0 +1,87 @@

+FROM python:3.11-slim
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    wget \
+    gnupg \
+    curl \
+    unzip \
+    xvfb \
+    libgconf-2-4 \
+    libxss1 \
+    libnss3 \
+    libnspr4 \
+    libasound2 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdbus-1-3 \
+    libdrm2 \
+    libgbm1 \
+    libgtk-3-0 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxfixes3 \
+    libxrandr2 \
+    xdg-utils \
+    fonts-liberation \
+    dbus \
+    xauth \
+    xvfb \
+    x11vnc \
+    tigervnc-tools \
+    supervisor \
+    net-tools \
+    procps \
+    git \
+    python3-numpy \
+    fontconfig \
+    fonts-dejavu \
+    fonts-dejavu-core \
+    fonts-dejavu-extra \
+    && rm -rf /var/lib/apt/lists/*
+# Install noVNC
+RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
+    && git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
+    && ln -s /opt/novnc/vnc.html /opt/novnc/index.html
+# Install Chrome
+RUN curl -fsSL https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg \
+    && echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" | tee /etc/apt/sources.list.d/google-chrome.list
+# Set up working directory
+WORKDIR /app
+# Copy requirements and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Install Playwright and browsers with system dependencies
+ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
+RUN playwright install --with-deps chromium
+RUN playwright install-deps
+RUN apt-get install -y google-chrome-stable
+# Copy the application code
+COPY . .
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV BROWSER_USE_LOGGING_LEVEL=info
+ENV CHROME_PATH=/usr/bin/google-chrome
+ENV ANONYMIZED_TELEMETRY=false
+ENV DISPLAY=:99
+ENV RESOLUTION=1920x1080x24
+ENV VNC_PASSWORD=vncpassword
+ENV CHROME_PERSISTENT_SESSION=true
+ENV RESOLUTION_WIDTH=1920
+ENV RESOLUTION_HEIGHT=1080
+# Set up supervisor configuration
+RUN mkdir -p /var/log/supervisor
+COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+EXPOSE 7788 6080 5900
+CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Browser Use Inc.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,14 +1,184 @@
----
-title: Webui
-emoji: 💻
-colorFrom: purple
-colorTo: blue
-sdk: gradio
-sdk_version: 5.13.1
-app_file: app.py
-pinned: false
-license: mit
-short_description: 'Automatizacion de paginas web '
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+<img src="./assets/web-ui.png" alt="Browser Use Web UI" width="full"/>
+<br/>
+[![GitHub stars](https://img.shields.io/github/stars/browser-use/web-ui?style=social)](https://github.com/browser-use/web-ui/stargazers)
+[![Discord](https://img.shields.io/discord/1303749220842340412?color=7289DA&label=Discord&logo=discord&logoColor=white)](https://link.browser-use.com/discord)
+[![Documentation](https://img.shields.io/badge/Documentation-📕-blue)](https://docs.browser-use.com)
+[![WarmShao](https://img.shields.io/twitter/follow/warmshao?style=social)](https://x.com/warmshao)
+This project builds upon the foundation of the [browser-use](https://github.com/browser-use/browser-use), which is designed to make websites accessible for AI agents.
+We would like to officially thank [WarmShao](https://github.com/warmshao) for his contribution to this project.
+**WebUI:** is built on Gradio and supports a most of `browser-use` functionalities. This UI is designed to be user-friendly and enables easy interaction with the browser agent.
+**Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic, DeepSeek, Ollama etc. And we plan to add support for even more models in the future.
+**Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.
+**Persistent Browser Sessions:** You can choose to keep the browser window open between AI tasks, allowing you to see the complete history and state of AI interactions.
+<video src="https://github.com/user-attachments/assets/56bc7080-f2e3-4367-af22-6bf2245ff6cb" controls="controls">Your browser does not support playing this video!</video>
+## Installation Options
+### Option 1: Local Installation
+Read the [quickstart guide](https://docs.browser-use.com/quickstart#prepare-the-environment) or follow the steps below to get started.
+> Python 3.11 or higher is required.
+First, we recommend using [uv](https://docs.astral.sh/uv/) to setup the Python environment.
+```bash
+uv venv --python 3.11
+```
+and activate it with:
+```bash
+source .venv/bin/activate
+```
+Install the dependencies:
+```bash
+uv pip install -r requirements.txt
+```
+Then install playwright:
+```bash
+playwright install
+```
+### Option 2: Docker Installation
+1. **Prerequisites:**
+   - Docker and Docker Compose installed on your system
+   - Git to clone the repository
+2. **Setup:**
+   ```bash
+   # Clone the repository
+   git clone https://github.com/browser-use/web-ui.git
+   cd web-ui
+   # Copy and configure environment variables
+   cp .env.example .env
+   # Edit .env with your preferred text editor and add your API keys
+   ```
+3. **Run with Docker:**
+   ```bash
+   # Build and start the container with default settings (browser closes after AI tasks)
+   docker compose up --build
+   # Or run with persistent browser (browser stays open between AI tasks)
+   CHROME_PERSISTENT_SESSION=true docker compose up --build
+   ```
+4. **Access the Application:**
+   - WebUI: `http://localhost:7788`
+   - VNC Viewer (to see browser interactions): `http://localhost:6080/vnc.html`
+   Default VNC password is "vncpassword". You can change it by setting the `VNC_PASSWORD` environment variable in your `.env` file.
+## Usage
+### Local Setup
+1.  Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM. `cp .env.example .env`
+2.  **Run the WebUI:**
+    ```bash
+    python webui.py --ip 127.0.0.1 --port 7788
+    ```
+4. WebUI options:
+   - `--ip`: The IP address to bind the WebUI to. Default is `127.0.0.1`.
+   - `--port`: The port to bind the WebUI to. Default is `7788`.
+   - `--theme`: The theme for the user interface. Default is `Ocean`.
+     - **Default**: The standard theme with a balanced design.
+     - **Soft**: A gentle, muted color scheme for a relaxed viewing experience.
+     - **Monochrome**: A grayscale theme with minimal color for simplicity and focus.
+     - **Glass**: A sleek, semi-transparent design for a modern appearance.
+     - **Origin**: A classic, retro-inspired theme for a nostalgic feel.
+     - **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors.
+     - **Ocean** (default): A blue, ocean-inspired theme providing a calming effect.
+   - `--dark-mode`: Enables dark mode for the user interface.
+3.  **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
+4.  **Using Your Own Browser(Optional):**
+    - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser.
+      - Windows
+        ```env
+         CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
+         CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
+        ```
+        > Note: Replace `YourUsername` with your actual Windows username for Windows systems.
+      - Mac
+        ```env
+         CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+         CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1"
+        ```
+    - Close all Chrome windows
+    - Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
+    - Check the "Use Own Browser" option within the Browser Settings.
+5. **Keep Browser Open(Optional):**
+    - Set `CHROME_PERSISTENT_SESSION=true` in the `.env` file.
+### Docker Setup
+1. **Environment Variables:**
+   - All configuration is done through the `.env` file
+   - Available environment variables:
+     ```
+     # LLM API Keys
+     OPENAI_API_KEY=your_key_here
+     ANTHROPIC_API_KEY=your_key_here
+     GOOGLE_API_KEY=your_key_here
+     # Browser Settings
+     CHROME_PERSISTENT_SESSION=true   # Set to true to keep browser open between AI tasks
+     RESOLUTION=1920x1080x24         # Custom resolution format: WIDTHxHEIGHTxDEPTH
+     RESOLUTION_WIDTH=1920           # Custom width in pixels
+     RESOLUTION_HEIGHT=1080          # Custom height in pixels
+     # VNC Settings
+     VNC_PASSWORD=your_vnc_password  # Optional, defaults to "vncpassword"
+     ```
+2. **Browser Persistence Modes:**
+   - **Default Mode (CHROME_PERSISTENT_SESSION=false):**
+     - Browser opens and closes with each AI task
+     - Clean state for each interaction
+     - Lower resource usage
+   - **Persistent Mode (CHROME_PERSISTENT_SESSION=true):**
+     - Browser stays open between AI tasks
+     - Maintains history and state
+     - Allows viewing previous AI interactions
+     - Set in `.env` file or via environment variable when starting container
+3. **Viewing Browser Interactions:**
+   - Access the noVNC viewer at `http://localhost:6080/vnc.html`
+   - Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD)
+   - You can now see all browser interactions in real-time
+4. **Container Management:**
+   ```bash
+   # Start with persistent browser
+   CHROME_PERSISTENT_SESSION=true docker compose up -d
+   # Start with default mode (browser closes after tasks)
+   docker compose up -d
+   # View logs
+   docker compose logs -f
+   # Stop the container
+   docker compose down
+   ```
+## Changelog
+- [x] **2025/01/26:** Thanks to @vvincent1234. Now browser-use-webui can combine with DeepSeek-r1 to engage in deep thinking!
+- [x] **2025/01/10:** Thanks to @casistack. Now we have Docker Setup option and also Support keep browser open between tasks.[Video tutorial demo](https://github.com/browser-use/web-ui/issues/1#issuecomment-2582511750).
+- [x] **2025/01/06:** Thanks to @richard-devbot. A New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).

SECURITY.md ADDED Viewed

	@@ -0,0 +1,19 @@

+## Reporting Security Issues
+If you believe you have found a security vulnerability in browser-use, please report it through coordinated disclosure.
+**Please do not report security vulnerabilities through the repository issues, discussions, or pull requests.**
+Instead, please open a new [Github security advisory](https://github.com/browser-use/web-ui/security/advisories/new).
+Please include as much of the information listed below as you can to help me better understand and resolve the issue:
+* The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting)
+* Full paths of source file(s) related to the manifestation of the issue
+* The location of the affected source code (tag/branch/commit or direct URL)
+* Any special configuration required to reproduce the issue
+* Step-by-step instructions to reproduce the issue
+* Proof-of-concept or exploit code (if possible)
+* Impact of the issue, including how an attacker might exploit the issue
+This information will help me triage your report more quickly.

agent_history.gif ADDED Viewed

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,47 @@

+services:
+  browser-use-webui:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "7788:7788"  # Gradio default port
+      - "6080:6080"  # noVNC web interface
+      - "5900:5900"  # VNC port
+      - "9222:9222"  # Chrome remote debugging port
+    environment:
+      - OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
+      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
+      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
+      - AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-}
+      - AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-}
+      - DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com}
+      - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
+      - BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
+      - ANONYMIZED_TELEMETRY=false
+      - CHROME_PATH=/usr/bin/google-chrome
+      - CHROME_USER_DATA=/app/data/chrome_data
+      - CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false}
+      - DISPLAY=:99
+      - PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
+      - RESOLUTION=${RESOLUTION:-1920x1080x24}
+      - RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
+      - RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
+      - VNC_PASSWORD=${VNC_PASSWORD:-vncpassword}
+      - CHROME_DEBUGGING_PORT=9222
+      - CHROME_DEBUGGING_HOST=localhost
+    volumes:
+      - /tmp/.X11-unix:/tmp/.X11-unix
+    restart: unless-stopped
+    shm_size: '2gb'
+    cap_add:
+      - SYS_ADMIN
+    security_opt:
+      - seccomp=unconfined
+    tmpfs:
+      - /tmp
+    healthcheck:
+      test: ["CMD", "nc", "-z", "localhost", "5900"]
+      interval: 10s
+      timeout: 5s
+      retries: 3

pyvenv.cfg ADDED Viewed

	@@ -0,0 +1,5 @@

+home = C:\Users\Usuario\AppData\Local\Programs\Python\Python312
+include-system-site-packages = false
+version = 3.12.4
+executable = C:\Users\Usuario\AppData\Local\Programs\Python\Python312\python.exe
+command = C:\Users\Usuario\AppData\Local\Programs\Python\Python312\python.exe -m venv C:\Users\Usuario\web-ui

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+browser-use==0.1.19
+langchain-google-genai==2.0.8
+pyperclip==1.9.0
+gradio==5.9.1
+langchain-ollama==0.2.2
+langchain-openai==0.2.14

supervisord.conf ADDED Viewed

	@@ -0,0 +1,83 @@

+[supervisord]
+nodaemon=true
+logfile=/dev/stdout
+logfile_maxbytes=0
+loglevel=debug
+[program:xvfb]
+command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=100
+startsecs=3
+[program:vnc_setup]
+command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
+autorestart=false
+startsecs=0
+priority=150
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+[program:x11vnc]
+command=bash -c "sleep 3 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5900 -bg -o /var/log/x11vnc.log"
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=200
+startretries=5
+startsecs=5
+depends_on=vnc_setup
+[program:x11vnc_log]
+command=tail -f /var/log/x11vnc.log
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=250
+[program:novnc]
+command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5900 --listen 0.0.0.0:6080 --web /opt/novnc"
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=300
+startretries=5
+startsecs=3
+depends_on=x11vnc
+[program:persistent_browser]
+command=bash -c 'mkdir -p /app/data/chrome_data && sleep 8 && google-chrome --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 "data:text/html,<html><body style=\"background: \#f0f0f0; margin: 0; display: flex; justify-content: center; align-items: center; height: 100vh; font-family: Arial;\"><h1>Browser Ready for AI Interaction</h1></body></html>"'
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=350
+startretries=3
+startsecs=3
+depends_on=novnc
+[program:webui]
+command=python webui.py --ip 0.0.0.0 --port 7788
+directory=/app
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=400
+startretries=3
+startsecs=3
+depends_on=persistent_browser

webui.py ADDED Viewed

	@@ -0,0 +1,952 @@

+import pdb
+import logging
+from dotenv import load_dotenv
+load_dotenv()
+import os
+import glob
+import asyncio
+import argparse
+import os
+logger = logging.getLogger(__name__)
+import gradio as gr
+from browser_use.agent.service import Agent
+from playwright.async_api import async_playwright
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import (
+    BrowserContextConfig,
+    BrowserContextWindowSize,
+)
+from playwright.async_api import async_playwright
+from src.utils.agent_state import AgentState
+from src.utils import utils
+from src.agent.custom_agent import CustomAgent
+from src.browser.custom_browser import CustomBrowser
+from src.agent.custom_prompts import CustomSystemPrompt
+from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext
+from src.controller.custom_controller import CustomController
+from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base
+from src.utils.default_config_settings import default_config, load_config_from_file, save_config_to_file, save_current_config, update_ui_from_config
+from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot
+# Global variables for persistence
+_global_browser = None
+_global_browser_context = None
+# Create the global agent state instance
+_global_agent_state = AgentState()
+async def stop_agent():
+    """Request the agent to stop and update UI with enhanced feedback"""
+    global _global_agent_state, _global_browser_context, _global_browser
+    try:
+        # Request stop
+        _global_agent_state.request_stop()
+        # Update UI immediately
+        message = "Stop requested - the agent will halt at the next safe point"
+        logger.info(f"🛑 {message}")
+        # Return UI updates
+        return (
+            message,                                        # errors_output
+            gr.update(value="Stopping...", interactive=False),  # stop_button
+            gr.update(interactive=False),                      # run_button
+        )
+    except Exception as e:
+        error_msg = f"Error during stop: {str(e)}"
+        logger.error(error_msg)
+        return (
+            error_msg,
+            gr.update(value="Stop", interactive=True),
+            gr.update(interactive=True)
+        )
+async def run_browser_agent(
+        agent_type,
+        llm_provider,
+        llm_model_name,
+        llm_temperature,
+        llm_base_url,
+        llm_api_key,
+        use_own_browser,
+        keep_browser_open,
+        headless,
+        disable_security,
+        window_w,
+        window_h,
+        save_recording_path,
+        save_agent_history_path,
+        save_trace_path,
+        enable_recording,
+        task,
+        add_infos,
+        max_steps,
+        use_vision,
+        max_actions_per_step,
+        tool_call_in_content
+):
+    global _global_agent_state
+    _global_agent_state.clear_stop()  # Clear any previous stop requests
+    try:
+        # Disable recording if the checkbox is unchecked
+        if not enable_recording:
+            save_recording_path = None
+        # Ensure the recording directory exists if recording is enabled
+        if save_recording_path:
+            os.makedirs(save_recording_path, exist_ok=True)
+        # Get the list of existing videos before the agent runs
+        existing_videos = set()
+        if save_recording_path:
+            existing_videos = set(
+                glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
+                + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
+            )
+        # Run the agent
+        llm = utils.get_llm_model(
+            provider=llm_provider,
+            model_name=llm_model_name,
+            temperature=llm_temperature,
+            base_url=llm_base_url,
+            api_key=llm_api_key,
+        )
+        if agent_type == "org":
+            final_result, errors, model_actions, model_thoughts, trace_file, history_file = await run_org_agent(
+                llm=llm,
+                use_own_browser=use_own_browser,
+                keep_browser_open=keep_browser_open,
+                headless=headless,
+                disable_security=disable_security,
+                window_w=window_w,
+                window_h=window_h,
+                save_recording_path=save_recording_path,
+                save_agent_history_path=save_agent_history_path,
+                save_trace_path=save_trace_path,
+                task=task,
+                max_steps=max_steps,
+                use_vision=use_vision,
+                max_actions_per_step=max_actions_per_step,
+                tool_call_in_content=tool_call_in_content
+            )
+        elif agent_type == "custom":
+            final_result, errors, model_actions, model_thoughts, trace_file, history_file = await run_custom_agent(
+                llm=llm,
+                use_own_browser=use_own_browser,
+                keep_browser_open=keep_browser_open,
+                headless=headless,
+                disable_security=disable_security,
+                window_w=window_w,
+                window_h=window_h,
+                save_recording_path=save_recording_path,
+                save_agent_history_path=save_agent_history_path,
+                save_trace_path=save_trace_path,
+                task=task,
+                add_infos=add_infos,
+                max_steps=max_steps,
+                use_vision=use_vision,
+                max_actions_per_step=max_actions_per_step,
+                tool_call_in_content=tool_call_in_content
+            )
+        else:
+            raise ValueError(f"Invalid agent type: {agent_type}")
+        # Get the list of videos after the agent runs (if recording is enabled)
+        latest_video = None
+        if save_recording_path:
+            new_videos = set(
+                glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
+                + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
+            )
+            if new_videos - existing_videos:
+                latest_video = list(new_videos - existing_videos)[0]  # Get the first new video
+        return (
+            final_result,
+            errors,
+            model_actions,
+            model_thoughts,
+            latest_video,
+            trace_file,
+            history_file,
+            gr.update(value="Stop", interactive=True),  # Re-enable stop button
+            gr.update(interactive=True)    # Re-enable run button
+        )
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        errors = str(e) + "\n" + traceback.format_exc()
+        return (
+            '',                                         # final_result
+            errors,                                     # errors
+            '',                                         # model_actions
+            '',                                         # model_thoughts
+            None,                                       # latest_video
+            None,                                       # history_file
+            None,                                       # trace_file
+            gr.update(value="Stop", interactive=True),  # Re-enable stop button
+            gr.update(interactive=True)    # Re-enable run button
+        )
+async def run_org_agent(
+        llm,
+        use_own_browser,
+        keep_browser_open,
+        headless,
+        disable_security,
+        window_w,
+        window_h,
+        save_recording_path,
+        save_agent_history_path,
+        save_trace_path,
+        task,
+        max_steps,
+        use_vision,
+        max_actions_per_step,
+        tool_call_in_content
+):
+    try:
+        global _global_browser, _global_browser_context, _global_agent_state
+        # Clear any previous stop request
+        _global_agent_state.clear_stop()
+        if use_own_browser:
+            chrome_path = os.getenv("CHROME_PATH", None)
+            if chrome_path == "":
+                chrome_path = None
+        else:
+            chrome_path = None
+        if _global_browser is None:
+            _global_browser = Browser(
+                config=BrowserConfig(
+                    headless=headless,
+                    disable_security=disable_security,
+                    chrome_instance_path=chrome_path,
+                    extra_chromium_args=[f"--window-size={window_w},{window_h}"],
+                )
+            )
+        if _global_browser_context is None:
+            _global_browser_context = await _global_browser.new_context(
+                config=BrowserContextConfig(
+                    trace_path=save_trace_path if save_trace_path else None,
+                    save_recording_path=save_recording_path if save_recording_path else None,
+                    no_viewport=False,
+                    browser_window_size=BrowserContextWindowSize(
+                        width=window_w, height=window_h
+                    ),
+                )
+            )
+        agent = Agent(
+            task=task,
+            llm=llm,
+            use_vision=use_vision,
+            browser=_global_browser,
+            browser_context=_global_browser_context,
+            max_actions_per_step=max_actions_per_step,
+            tool_call_in_content=tool_call_in_content
+        )
+        history = await agent.run(max_steps=max_steps)
+        history_file = os.path.join(save_agent_history_path, f"{agent.agent_id}.json")
+        agent.save_history(history_file)
+        final_result = history.final_result()
+        errors = history.errors()
+        model_actions = history.model_actions()
+        model_thoughts = history.model_thoughts()
+        trace_file = get_latest_files(save_trace_path)
+        return final_result, errors, model_actions, model_thoughts, trace_file.get('.zip'), history_file
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        errors = str(e) + "\n" + traceback.format_exc()
+        return '', errors, '', '', None, None
+    finally:
+        # Handle cleanup based on persistence configuration
+        if not keep_browser_open:
+            if _global_browser_context:
+                await _global_browser_context.close()
+                _global_browser_context = None
+            if _global_browser:
+                await _global_browser.close()
+                _global_browser = None
+async def run_custom_agent(
+        llm,
+        use_own_browser,
+        keep_browser_open,
+        headless,
+        disable_security,
+        window_w,
+        window_h,
+        save_recording_path,
+        save_agent_history_path,
+        save_trace_path,
+        task,
+        add_infos,
+        max_steps,
+        use_vision,
+        max_actions_per_step,
+        tool_call_in_content
+):
+    try:
+        global _global_browser, _global_browser_context, _global_agent_state
+        # Clear any previous stop request
+        _global_agent_state.clear_stop()
+        if use_own_browser:
+            chrome_path = os.getenv("CHROME_PATH", None)
+            if chrome_path == "":
+                chrome_path = None
+        else:
+            chrome_path = None
+        controller = CustomController()
+        # Initialize global browser if needed
+        if _global_browser is None:
+            _global_browser = CustomBrowser(
+                config=BrowserConfig(
+                    headless=headless,
+                    disable_security=disable_security,
+                    chrome_instance_path=chrome_path,
+                    extra_chromium_args=[f"--window-size={window_w},{window_h}"],
+                )
+            )
+        if _global_browser_context is None:
+            _global_browser_context = await _global_browser.new_context(
+                config=BrowserContextConfig(
+                    trace_path=save_trace_path if save_trace_path else None,
+                    save_recording_path=save_recording_path if save_recording_path else None,
+                    no_viewport=False,
+                    browser_window_size=BrowserContextWindowSize(
+                        width=window_w, height=window_h
+                    ),
+                )
+            )
+        # Create and run agent
+        agent = CustomAgent(
+            task=task,
+            add_infos=add_infos,
+            use_vision=use_vision,
+            llm=llm,
+            browser=_global_browser,
+            browser_context=_global_browser_context,
+            controller=controller,
+            system_prompt_class=CustomSystemPrompt,
+            max_actions_per_step=max_actions_per_step,
+            tool_call_in_content=tool_call_in_content,
+            agent_state=_global_agent_state
+        )
+        history = await agent.run(max_steps=max_steps)
+        history_file = os.path.join(save_agent_history_path, f"{agent.agent_id}.json")
+        agent.save_history(history_file)
+        final_result = history.final_result()
+        errors = history.errors()
+        model_actions = history.model_actions()
+        model_thoughts = history.model_thoughts()
+        trace_file = get_latest_files(save_trace_path)
+        return final_result, errors, model_actions, model_thoughts, trace_file.get('.zip'), history_file
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        errors = str(e) + "\n" + traceback.format_exc()
+        return '', errors, '', '', None, None
+    finally:
+        # Handle cleanup based on persistence configuration
+        if not keep_browser_open:
+            if _global_browser_context:
+                await _global_browser_context.close()
+                _global_browser_context = None
+            if _global_browser:
+                await _global_browser.close()
+                _global_browser = None
+async def run_with_stream(
+    agent_type,
+    llm_provider,
+    llm_model_name,
+    llm_temperature,
+    llm_base_url,
+    llm_api_key,
+    use_own_browser,
+    keep_browser_open,
+    headless,
+    disable_security,
+    window_w,
+    window_h,
+    save_recording_path,
+    save_agent_history_path,
+    save_trace_path,
+    enable_recording,
+    task,
+    add_infos,
+    max_steps,
+    use_vision,
+    max_actions_per_step,
+    tool_call_in_content
+):
+    global _global_agent_state
+    stream_vw = 80
+    stream_vh = int(80 * window_h // window_w)
+    if not headless:
+        result = await run_browser_agent(
+            agent_type=agent_type,
+            llm_provider=llm_provider,
+            llm_model_name=llm_model_name,
+            llm_temperature=llm_temperature,
+            llm_base_url=llm_base_url,
+            llm_api_key=llm_api_key,
+            use_own_browser=use_own_browser,
+            keep_browser_open=keep_browser_open,
+            headless=headless,
+            disable_security=disable_security,
+            window_w=window_w,
+            window_h=window_h,
+            save_recording_path=save_recording_path,
+            save_agent_history_path=save_agent_history_path,
+            save_trace_path=save_trace_path,
+            enable_recording=enable_recording,
+            task=task,
+            add_infos=add_infos,
+            max_steps=max_steps,
+            use_vision=use_vision,
+            max_actions_per_step=max_actions_per_step,
+            tool_call_in_content=tool_call_in_content
+        )
+        # Add HTML content at the start of the result array
+        html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
+        yield [html_content] + list(result)
+    else:
+        try:
+            _global_agent_state.clear_stop()
+            # Run the browser agent in the background
+            agent_task = asyncio.create_task(
+                run_browser_agent(
+                    agent_type=agent_type,
+                    llm_provider=llm_provider,
+                    llm_model_name=llm_model_name,
+                    llm_temperature=llm_temperature,
+                    llm_base_url=llm_base_url,
+                    llm_api_key=llm_api_key,
+                    use_own_browser=use_own_browser,
+                    keep_browser_open=keep_browser_open,
+                    headless=headless,
+                    disable_security=disable_security,
+                    window_w=window_w,
+                    window_h=window_h,
+                    save_recording_path=save_recording_path,
+                    save_agent_history_path=save_agent_history_path,
+                    save_trace_path=save_trace_path,
+                    enable_recording=enable_recording,
+                    task=task,
+                    add_infos=add_infos,
+                    max_steps=max_steps,
+                    use_vision=use_vision,
+                    max_actions_per_step=max_actions_per_step,
+                    tool_call_in_content=tool_call_in_content
+                )
+            )
+            # Initialize values for streaming
+            html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
+            final_result = errors = model_actions = model_thoughts = ""
+            latest_videos = trace = history_file = None
+            # Periodically update the stream while the agent task is running
+            while not agent_task.done():
+                try:
+                    encoded_screenshot = await capture_screenshot(_global_browser_context)
+                    if encoded_screenshot is not None:
+                        html_content = f'<img src="data:image/jpeg;base64,{encoded_screenshot}" style="width:{stream_vw}vw; height:{stream_vh}vh ; border:1px solid #ccc;">'
+                    else:
+                        html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
+                except Exception as e:
+                    html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
+                if _global_agent_state and _global_agent_state.is_stop_requested():
+                    yield [
+                        html_content,
+                        final_result,
+                        errors,
+                        model_actions,
+                        model_thoughts,
+                        latest_videos,
+                        trace,
+                        history_file,
+                        gr.update(value="Stopping...", interactive=False),  # stop_button
+                        gr.update(interactive=False),  # run_button
+                    ]
+                    break
+                else:
+                    yield [
+                        html_content,
+                        final_result,
+                        errors,
+                        model_actions,
+                        model_thoughts,
+                        latest_videos,
+                        trace,
+                        history_file,
+                        gr.update(value="Stop", interactive=True),  # Re-enable stop button
+                        gr.update(interactive=True)  # Re-enable run button
+                    ]
+                await asyncio.sleep(0.05)
+            # Once the agent task completes, get the results
+            try:
+                result = await agent_task
+                final_result, errors, model_actions, model_thoughts, latest_videos, trace, history_file, stop_button, run_button = result
+            except Exception as e:
+                errors = f"Agent error: {str(e)}"
+            yield [
+                html_content,
+                final_result,
+                errors,
+                model_actions,
+                model_thoughts,
+                latest_videos,
+                trace,
+                history_file,
+                stop_button,
+                run_button
+            ]
+        except Exception as e:
+            import traceback
+            yield [
+                f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>",
+                "",
+                f"Error: {str(e)}\n{traceback.format_exc()}",
+                "",
+                "",
+                None,
+                None,
+                None,
+                gr.update(value="Stop", interactive=True),  # Re-enable stop button
+                gr.update(interactive=True)    # Re-enable run button
+            ]
+# Define the theme map globally
+theme_map = {
+    "Default": Default(),
+    "Soft": Soft(),
+    "Monochrome": Monochrome(),
+    "Glass": Glass(),
+    "Origin": Origin(),
+    "Citrus": Citrus(),
+    "Ocean": Ocean(),
+    "Base": Base()
+}
+async def close_global_browser():
+    global _global_browser, _global_browser_context
+    if _global_browser_context:
+        await _global_browser_context.close()
+        _global_browser_context = None
+    if _global_browser:
+        await _global_browser.close()
+        _global_browser = None
+def create_ui(config, theme_name="Ocean"):
+    css = """
+    .gradio-container {
+        max-width: 1200px !important;
+        margin: auto !important;
+        padding-top: 20px !important;
+    }
+    .header-text {
+        text-align: center;
+        margin-bottom: 30px;
+    }
+    .theme-section {
+        margin-bottom: 20px;
+        padding: 15px;
+        border-radius: 10px;
+    }
+    """
+    js = """
+    function refresh() {
+        const url = new URL(window.location);
+        if (url.searchParams.get('__theme') !== 'dark') {
+            url.searchParams.set('__theme', 'dark');
+            window.location.href = url.href;
+        }
+    }
+    """
+    with gr.Blocks(
+            title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js
+    ) as demo:
+        with gr.Row():
+            gr.Markdown(
+                """
+                # 🌐 Browser Use WebUI
+                ### Control your browser with AI assistance
+                """,
+                elem_classes=["header-text"],
+            )
+        with gr.Tabs() as tabs:
+            with gr.TabItem("⚙️ Agent Settings", id=1):
+                with gr.Group():
+                    agent_type = gr.Radio(
+                        ["org", "custom"],
+                        label="Agent Type",
+                        value=config['agent_type'],
+                        info="Select the type of agent to use",
+                    )
+                    max_steps = gr.Slider(
+                        minimum=1,
+                        maximum=200,
+                        value=config['max_steps'],
+                        step=1,
+                        label="Max Run Steps",
+                        info="Maximum number of steps the agent will take",
+                    )
+                    max_actions_per_step = gr.Slider(
+                        minimum=1,
+                        maximum=20,
+                        value=config['max_actions_per_step'],
+                        step=1,
+                        label="Max Actions per Step",
+                        info="Maximum number of actions the agent will take per step",
+                    )
+                    use_vision = gr.Checkbox(
+                        label="Use Vision",
+                        value=config['use_vision'],
+                        info="Enable visual processing capabilities",
+                    )
+                    tool_call_in_content = gr.Checkbox(
+                        label="Use Tool Calls in Content",
+                        value=config['tool_call_in_content'],
+                        info="Enable Tool Calls in content",
+                    )
+            with gr.TabItem("🔧 LLM Configuration", id=2):
+                with gr.Group():
+                    llm_provider = gr.Dropdown(
+                        choices=[provider for provider,model in utils.model_names.items()],
+                        label="LLM Provider",
+                        value=config['llm_provider'],
+                        info="Select your preferred language model provider"
+                    )
+                    llm_model_name = gr.Dropdown(
+                        label="Model Name",
+                        choices=utils.model_names['openai'],
+                        value=config['llm_model_name'],
+                        interactive=True,
+                        allow_custom_value=True,  # Allow users to input custom model names
+                        info="Select a model from the dropdown or type a custom model name"
+                    )
+                    llm_temperature = gr.Slider(
+                        minimum=0.0,
+                        maximum=2.0,
+                        value=config['llm_temperature'],
+                        step=0.1,
+                        label="Temperature",
+                        info="Controls randomness in model outputs"
+                    )
+                    with gr.Row():
+                        llm_base_url = gr.Textbox(
+                            label="Base URL",
+                            value=config['llm_base_url'],
+                            info="API endpoint URL (if required)"
+                        )
+                        llm_api_key = gr.Textbox(
+                            label="API Key",
+                            type="password",
+                            value=config['llm_api_key'],
+                            info="Your API key (leave blank to use .env)"
+                        )
+            with gr.TabItem("🌐 Browser Settings", id=3):
+                with gr.Group():
+                    with gr.Row():
+                        use_own_browser = gr.Checkbox(
+                            label="Use Own Browser",
+                            value=config['use_own_browser'],
+                            info="Use your existing browser instance",
+                        )
+                        keep_browser_open = gr.Checkbox(
+                            label="Keep Browser Open",
+                            value=config['keep_browser_open'],
+                            info="Keep Browser Open between Tasks",
+                        )
+                        headless = gr.Checkbox(
+                            label="Headless Mode",
+                            value=config['headless'],
+                            info="Run browser without GUI",
+                        )
+                        disable_security = gr.Checkbox(
+                            label="Disable Security",
+                            value=config['disable_security'],
+                            info="Disable browser security features",
+                        )
+                        enable_recording = gr.Checkbox(
+                            label="Enable Recording",
+                            value=config['enable_recording'],
+                            info="Enable saving browser recordings",
+                        )
+                    with gr.Row():
+                        window_w = gr.Number(
+                            label="Window Width",
+                            value=config['window_w'],
+                            info="Browser window width",
+                        )
+                        window_h = gr.Number(
+                            label="Window Height",
+                            value=config['window_h'],
+                            info="Browser window height",
+                        )
+                    save_recording_path = gr.Textbox(
+                        label="Recording Path",
+                        placeholder="e.g. ./tmp/record_videos",
+                        value=config['save_recording_path'],
+                        info="Path to save browser recordings",
+                        interactive=True,  # Allow editing only if recording is enabled
+                    )
+                    save_trace_path = gr.Textbox(
+                        label="Trace Path",
+                        placeholder="e.g. ./tmp/traces",
+                        value=config['save_trace_path'],
+                        info="Path to save Agent traces",
+                        interactive=True,
+                    )
+                    save_agent_history_path = gr.Textbox(
+                        label="Agent History Save Path",
+                        placeholder="e.g., ./tmp/agent_history",
+                        value=config['save_agent_history_path'],
+                        info="Specify the directory where agent history should be saved.",
+                        interactive=True,
+                    )
+            with gr.TabItem("🤖 Run Agent", id=4):
+                task = gr.Textbox(
+                    label="Task Description",
+                    lines=4,
+                    placeholder="Enter your task here...",
+                    value=config['task'],
+                    info="Describe what you want the agent to do",
+                )
+                add_infos = gr.Textbox(
+                    label="Additional Information",
+                    lines=3,
+                    placeholder="Add any helpful context or instructions...",
+                    info="Optional hints to help the LLM complete the task",
+                )
+                with gr.Row():
+                    run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
+                    stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
+                with gr.Row():
+                    browser_view = gr.HTML(
+                        value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
+                        label="Live Browser View",
+                )
+            with gr.TabItem("📁 Configuration", id=5):
+                with gr.Group():
+                    config_file_input = gr.File(
+                        label="Load Config File",
+                        file_types=[".pkl"],
+                        interactive=True
+                    )
+                    load_config_button = gr.Button("Load Existing Config From File", variant="primary")
+                    save_config_button = gr.Button("Save Current Config", variant="primary")
+                    config_status = gr.Textbox(
+                        label="Status",
+                        lines=2,
+                        interactive=False
+                    )
+                load_config_button.click(
+                    fn=update_ui_from_config,
+                    inputs=[config_file_input],
+                    outputs=[
+                        agent_type, max_steps, max_actions_per_step, use_vision, tool_call_in_content,
+                        llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
+                        use_own_browser, keep_browser_open, headless, disable_security, enable_recording,
+                        window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path,
+                        task, config_status
+                    ]
+                )
+                save_config_button.click(
+                    fn=save_current_config,
+                    inputs=[
+                        agent_type, max_steps, max_actions_per_step, use_vision, tool_call_in_content,
+                        llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
+                        use_own_browser, keep_browser_open, headless, disable_security,
+                        enable_recording, window_w, window_h, save_recording_path, save_trace_path,
+                        save_agent_history_path, task,
+                    ],
+                    outputs=[config_status]
+                )
+            with gr.TabItem("📊 Results", id=6):
+                with gr.Group():
+                    recording_display = gr.Video(label="Latest Recording")
+                    gr.Markdown("### Results")
+                    with gr.Row():
+                        with gr.Column():
+                            final_result_output = gr.Textbox(
+                                label="Final Result", lines=3, show_label=True
+                            )
+                        with gr.Column():
+                            errors_output = gr.Textbox(
+                                label="Errors", lines=3, show_label=True
+                            )
+                    with gr.Row():
+                        with gr.Column():
+                            model_actions_output = gr.Textbox(
+                                label="Model Actions", lines=3, show_label=True
+                            )
+                        with gr.Column():
+                            model_thoughts_output = gr.Textbox(
+                                label="Model Thoughts", lines=3, show_label=True
+                            )
+                    trace_file = gr.File(label="Trace File")
+                    agent_history_file = gr.File(label="Agent History")
+                # Bind the stop button click event after errors_output is defined
+                stop_button.click(
+                    fn=stop_agent,
+                    inputs=[],
+                    outputs=[errors_output, stop_button, run_button],
+                )
+                # Run button click handler
+                run_button.click(
+                    fn=run_with_stream,
+                        inputs=[
+                            agent_type, llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
+                            use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h,
+                            save_recording_path, save_agent_history_path, save_trace_path,  # Include the new path
+                            enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_call_in_content
+                        ],
+                    outputs=[
+                        browser_view,           # Browser view
+                        final_result_output,    # Final result
+                        errors_output,          # Errors
+                        model_actions_output,   # Model actions
+                        model_thoughts_output,  # Model thoughts
+                        recording_display,      # Latest recording
+                        trace_file,             # Trace file
+                        agent_history_file,     # Agent history file
+                        stop_button,            # Stop button
+                        run_button              # Run button
+                    ],
+                )
+            with gr.TabItem("🎥 Recordings", id=7):
+                def list_recordings(save_recording_path):
+                    if not os.path.exists(save_recording_path):
+                        return []
+                    # Get all video files
+                    recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
+                    # Sort recordings by creation time (oldest first)
+                    recordings.sort(key=os.path.getctime)
+                    # Add numbering to the recordings
+                    numbered_recordings = []
+                    for idx, recording in enumerate(recordings, start=1):
+                        filename = os.path.basename(recording)
+                        numbered_recordings.append((recording, f"{idx}. {filename}"))
+                    return numbered_recordings
+                recordings_gallery = gr.Gallery(
+                    label="Recordings",
+                    value=list_recordings(config['save_recording_path']),
+                    columns=3,
+                    height="auto",
+                    object_fit="contain"
+                )
+                refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary")
+                refresh_button.click(
+                    fn=list_recordings,
+                    inputs=save_recording_path,
+                    outputs=recordings_gallery
+                )
+        # Attach the callback to the LLM provider dropdown
+        llm_provider.change(
+            lambda provider, api_key, base_url: update_model_dropdown(provider, api_key, base_url),
+            inputs=[llm_provider, llm_api_key, llm_base_url],
+            outputs=llm_model_name
+        )
+        # Add this after defining the components
+        enable_recording.change(
+            lambda enabled: gr.update(interactive=enabled),
+            inputs=enable_recording,
+            outputs=save_recording_path
+        )
+        use_own_browser.change(fn=close_global_browser)
+        keep_browser_open.change(fn=close_global_browser)
+    return demo
+def main():
+    parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
+    parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
+    parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
+    parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
+    parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
+    args = parser.parse_args()
+    config_dict = default_config()
+    demo = create_ui(config_dict, theme_name=args.theme)
+    demo.launch(server_name=args.ip, server_port=args.port)
+if __name__ == '__main__':
+    main()