import gradio as gr import asyncio from typing import Dict, List, Tuple import os from datetime import datetime from loguru import logger import sys import json # Configure logging logger.remove() logger.add(sys.stdout, level="INFO", format="{time:HH:mm:ss} | {level: <8} | {message}") # Try importing agent components try: from agent.autonomous_agent import AutonomousBrowserAgent from agent.planner_agent import PlannerAgent from mayini_integration.policy_network import MayiniPolicyNetwork AGENT_AVAILABLE = True logger.info("â Agent components loaded successfully") except ImportError as e: AGENT_AVAILABLE = False logger.error(f"â Could not load agent: {str(e)}") class BrowserAgentInterface: """Gradio interface for the autonomous browser agent.""" def __init__(self): """Initialize the interface.""" self.agent = None self.task_history: List[Dict] = [] self.max_history = 10 logger.info("đ Browser Agent Interface initialized") def execute_task_sync( self, task: str, url: str, headless: bool, max_steps: int ) -> Tuple[str, str, str]: """ Synchronous wrapper for Gradio compatibility. Args: task: Task description url: Starting URL headless: Run headless max_steps: Maximum steps Returns: Tuple of (status, results_json, history_text) """ return asyncio.run(self.execute_task_async(task, url, headless, max_steps)) async def execute_task_async( self, task: str, url: str, headless: bool, max_steps: int ) -> Tuple[str, str, str]: """ Execute task asynchronously. Args: task: Task description url: Starting URL headless: Run in headless mode max_steps: Maximum steps Returns: Tuple of (status_text, results_json, history_text) """ if not AGENT_AVAILABLE: return ( "â Demo Mode: Agent not available. This is a demo interface.", json.dumps({"error": "Agent components not loaded", "demo": True}, indent=2), "No tasks executed yet (demo mode)" ) if not task.strip(): return ( "â ī¸ Error: Task description cannot be empty", json.dumps({"error": "Empty task"}, indent=2), "Please enter a task description" ) if not url.strip(): return ( "â ī¸ Error: URL cannot be empty", json.dumps({"error": "Empty URL"}, indent=2), "Please enter a starting URL" ) try: logger.info(f"đ Executing task: {task}") logger.info(f"đ URL: {url}") logger.info(f"âī¸ Headless: {headless}, Max Steps: {max_steps}") # Initialize agent self.agent = AutonomousBrowserAgent( headless=headless, browser_type="chromium", embedding_dim=512, hidden_dim=256, num_actions=50 ) # Execute task results = await self.agent.execute_task( task=task, url=url, max_steps=max_steps, mode="autonomous" ) # Save to history history_entry = { "timestamp": datetime.now().isoformat(), "task": task, "url": url, "success": results.get("success", False), "steps_completed": len(results.get("steps", [])) } self.task_history.append(history_entry) # Keep only recent history if len(self.task_history) > self.max_history: self.task_history = self.task_history[-self.max_history:] # Format results status = "â Success!" if results.get("success") else "â ī¸ Partial Success" steps_completed = len(results.get("steps", [])) sub_tasks_completed = sum( 1 for step in results.get("steps", []) if step.get("success", False) ) status_text = f""" {status} đ **Task:** {task} đ **URL:** {url} đ **Steps Completed:** {steps_completed}/{max_steps} â **Successful Steps:** {sub_tasks_completed} âąī¸ **Timestamp:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} **Sub-tasks:** {len(results.get("sub_tasks", []))} {chr(10).join(f'âĸ {st}' for st in results.get("sub_tasks", [])[:5])} """ # Format results as JSON results_json = json.dumps(results, indent=2, default=str) # Format history history_text = self._format_history() # Close agent await self.agent.close() logger.info(f"â Task completed successfully") return status_text, results_json, history_text except Exception as e: logger.error(f"â Task execution failed: {str(e)}") if self.agent: try: await self.agent.close() except: pass return ( f"â Error: {str(e)}", json.dumps({"error": str(e), "type": type(e).__name__}, indent=2), self._format_history() ) def decompose_task(self, task: str) -> str: """ Show task decomposition. Args: task: Task description Returns: Formatted sub-tasks """ if not AGENT_AVAILABLE: return "Agent not available (demo mode)" if not task.strip(): return "Please enter a task description" try: planner = PlannerAgent() sub_tasks = planner.decompose_task(task) result = "đ **Task Decomposition**\n\n" result += f"**Original Task:** {task}\n\n" result += f"**Sub-tasks:** ({len(sub_tasks)} steps)\n\n" for i, sub_task in enumerate(sub_tasks, 1): result += f"{i}. {sub_task}\n" return result except Exception as e: logger.error(f"Decomposition failed: {str(e)}") return f"Error: {str(e)}" def _format_history(self) -> str: """Format task history for display.""" if not self.task_history: return "đ No tasks executed yet" history_text = "đ **Recent Tasks**\n\n" for i, task in enumerate(reversed(self.task_history), 1): status = "â " if task["success"] else "â ī¸" history_text += f"{i}. {status} {task['task']}\n" history_text += f" URL: {task['url']}\n" history_text += f" Steps: {task['steps_completed']}\n" history_text += f" Time: {task['timestamp']}\n\n" return history_text def create_interface(): """Create Gradio interface with theme and styling.""" interface = BrowserAgentInterface() with gr.Blocks( title="đ¤ Autonomous Browser Agent", theme=gr.themes.Soft() ) as demo: gr.Markdown(""" # đ¤ Autonomous Browser Agent with MAYINI Framework ### Intelligent Web Automation Powered by Deep Learning This agent combines: - **đ§ MAYINI Framework** - Custom deep learning for decision-making - **đī¸ Vision Transformers** - Visual page understanding - **đ Playwright** - Cross-browser automation - **đ Reinforcement Learning** - Continuous improvement --- """) with gr.Tab("đ Execute Task"): gr.Markdown("### Execute a web automation task") with gr.Row(): with gr.Column(scale=3): task_input = gr.Textbox( label="đ Task Description", placeholder="Example: Search for flights from NYC to London on Dec 20", lines=3, info="Describe what you want the agent to do" ) url_input = gr.Textbox( label="đ Starting URL", placeholder="https://www.google.com/flights", value="https://www.google.com", info="URL where the agent will start" ) with gr.Row(): headless_checkbox = gr.Checkbox( label="đ Run Headless", value=True, info="Run browser in background (no visible window)" ) max_steps_slider = gr.Slider( minimum=5, maximum=100, value=30, step=5, label="âąī¸ Max Steps", info="Maximum number of actions to attempt" ) execute_btn = gr.Button( "âļī¸ Execute Task", variant="primary", size="lg" ) with gr.Column(scale=1): status_output = gr.Textbox( label="đ Status", lines=12, interactive=False, show_label=True ) with gr.Row(): results_output = gr.Textbox( label="đ Detailed Results (JSON)", lines=15, interactive=False, max_lines=20 ) history_output = gr.Textbox( label="đ Task History", lines=15, interactive=False ) execute_btn.click( fn=interface.execute_task_sync, inputs=[task_input, url_input, headless_checkbox, max_steps_slider], outputs=[status_output, results_output, history_output] ) with gr.Tab("đ Task Planner"): gr.Markdown("### Visualize how your task will be decomposed") with gr.Row(): planner_task_input = gr.Textbox( label="đ Task", placeholder="Example: Buy a laptop on Amazon", lines=2 ) decompose_btn = gr.Button("đ¨ Decompose", variant="secondary") decomposition_output = gr.Textbox( label="đ Sub-Tasks", lines=12, interactive=False ) decompose_btn.click( fn=interface.decompose_task, inputs=[planner_task_input], outputs=[decomposition_output] ) with gr.Tab("âšī¸ About"): gr.Markdown(""" ## About This Project ### đī¸ Architecture This autonomous browser agent combines cutting-edge technologies: 1. **MAYINI Framework**: Custom deep learning library with neural networks 2. **Vision Transformers**: Visual page understanding without HTML dependency 3. **Playwright**: Cross-browser automation with auto-waiting 4. **Reinforcement Learning**: Policy gradient methods for improvement ### đ¯ Key Features - **Hierarchical Planning**: Breaks complex tasks into sub-goals - **Visual Understanding**: Screenshot-based page comprehension - **Memory-Augmented**: LSTM networks remember past interactions - **Multi-Task Learning**: Trained on diverse web tasks - **Exploration**: Curiosity-driven discovery of new actions ### đ Use Cases - Form filling and submission - Web scraping and data extraction - E-commerce automation - Navigation and search - Testing and QA ### đ Links - [GitHub](https://github.com/yourusername/autonomous-browser-agent) - [MAYINI Framework](https://pypi.org/project/mayini-framework/) - [Playwright](https://playwright.dev/) - [Documentation](https://docs.example.com) ### đ License MIT License - Free to use and modify! """) gr.Markdown(""" ---
Built with â¤ī¸ using MAYINI, Playwright, and Vision Transformers
Š 2024 | Autonomous Browser Agent Project