Spaces:
Running
Running
| import gradio as gr | |
| import asyncio | |
| from typing import Dict, List, Tuple | |
| import os | |
| from datetime import datetime | |
| from loguru import logger | |
| import sys | |
| import json | |
| # Configure logging | |
| logger.remove() | |
| logger.add(sys.stdout, level="INFO", format="{time:HH:mm:ss} | {level: <8} | {message}") | |
| # Try importing agent components | |
| try: | |
| from agent.autonomous_agent import AutonomousBrowserAgent | |
| from agent.planner_agent import PlannerAgent | |
| from mayini_integration.policy_network import MayiniPolicyNetwork | |
| AGENT_AVAILABLE = True | |
| logger.info("β Agent components loaded successfully") | |
| except ImportError as e: | |
| AGENT_AVAILABLE = False | |
| logger.error(f"β Could not load agent: {str(e)}") | |
| class BrowserAgentInterface: | |
| """Gradio interface for the autonomous browser agent.""" | |
| def __init__(self): | |
| """Initialize the interface.""" | |
| self.agent = None | |
| self.task_history: List[Dict] = [] | |
| self.max_history = 10 | |
| logger.info("π Browser Agent Interface initialized") | |
| def execute_task_sync( | |
| self, | |
| task: str, | |
| url: str, | |
| headless: bool, | |
| max_steps: int | |
| ) -> Tuple[str, str, str]: | |
| """ | |
| Synchronous wrapper for Gradio compatibility. | |
| Args: | |
| task: Task description | |
| url: Starting URL | |
| headless: Run headless | |
| max_steps: Maximum steps | |
| Returns: | |
| Tuple of (status, results_json, history_text) | |
| """ | |
| return asyncio.run(self.execute_task_async(task, url, headless, max_steps)) | |
| async def execute_task_async( | |
| self, | |
| task: str, | |
| url: str, | |
| headless: bool, | |
| max_steps: int | |
| ) -> Tuple[str, str, str]: | |
| """ | |
| Execute task asynchronously. | |
| Args: | |
| task: Task description | |
| url: Starting URL | |
| headless: Run in headless mode | |
| max_steps: Maximum steps | |
| Returns: | |
| Tuple of (status_text, results_json, history_text) | |
| """ | |
| if not AGENT_AVAILABLE: | |
| return ( | |
| "β Demo Mode: Agent not available. This is a demo interface.", | |
| json.dumps({"error": "Agent components not loaded", "demo": True}, indent=2), | |
| "No tasks executed yet (demo mode)" | |
| ) | |
| if not task.strip(): | |
| return ( | |
| "β οΈ Error: Task description cannot be empty", | |
| json.dumps({"error": "Empty task"}, indent=2), | |
| "Please enter a task description" | |
| ) | |
| if not url.strip(): | |
| return ( | |
| "β οΈ Error: URL cannot be empty", | |
| json.dumps({"error": "Empty URL"}, indent=2), | |
| "Please enter a starting URL" | |
| ) | |
| try: | |
| logger.info(f"π Executing task: {task}") | |
| logger.info(f"π URL: {url}") | |
| logger.info(f"βοΈ Headless: {headless}, Max Steps: {max_steps}") | |
| # Initialize agent | |
| self.agent = AutonomousBrowserAgent( | |
| headless=headless, | |
| browser_type="chromium", | |
| embedding_dim=512, | |
| hidden_dim=256, | |
| num_actions=50 | |
| ) | |
| # Execute task | |
| results = await self.agent.execute_task( | |
| task=task, | |
| url=url, | |
| max_steps=max_steps, | |
| mode="autonomous" | |
| ) | |
| # Save to history | |
| history_entry = { | |
| "timestamp": datetime.now().isoformat(), | |
| "task": task, | |
| "url": url, | |
| "success": results.get("success", False), | |
| "steps_completed": len(results.get("steps", [])) | |
| } | |
| self.task_history.append(history_entry) | |
| # Keep only recent history | |
| if len(self.task_history) > self.max_history: | |
| self.task_history = self.task_history[-self.max_history:] | |
| # Format results | |
| status = "β Success!" if results.get("success") else "β οΈ Partial Success" | |
| steps_completed = len(results.get("steps", [])) | |
| sub_tasks_completed = sum( | |
| 1 for step in results.get("steps", []) | |
| if step.get("success", False) | |
| ) | |
| status_text = f""" | |
| {status} | |
| π **Task:** {task} | |
| π **URL:** {url} | |
| π **Steps Completed:** {steps_completed}/{max_steps} | |
| β **Successful Steps:** {sub_tasks_completed} | |
| β±οΈ **Timestamp:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | |
| **Sub-tasks:** {len(results.get("sub_tasks", []))} | |
| {chr(10).join(f'β’ {st}' for st in results.get("sub_tasks", [])[:5])} | |
| """ | |
| # Format results as JSON | |
| results_json = json.dumps(results, indent=2, default=str) | |
| # Format history | |
| history_text = self._format_history() | |
| # Close agent | |
| await self.agent.close() | |
| logger.info(f"β Task completed successfully") | |
| return status_text, results_json, history_text | |
| except Exception as e: | |
| logger.error(f"β Task execution failed: {str(e)}") | |
| if self.agent: | |
| try: | |
| await self.agent.close() | |
| except: | |
| pass | |
| return ( | |
| f"β Error: {str(e)}", | |
| json.dumps({"error": str(e), "type": type(e).__name__}, indent=2), | |
| self._format_history() | |
| ) | |
| def decompose_task(self, task: str) -> str: | |
| """ | |
| Show task decomposition. | |
| Args: | |
| task: Task description | |
| Returns: | |
| Formatted sub-tasks | |
| """ | |
| if not AGENT_AVAILABLE: | |
| return "Agent not available (demo mode)" | |
| if not task.strip(): | |
| return "Please enter a task description" | |
| try: | |
| planner = PlannerAgent() | |
| sub_tasks = planner.decompose_task(task) | |
| result = "π **Task Decomposition**\n\n" | |
| result += f"**Original Task:** {task}\n\n" | |
| result += f"**Sub-tasks:** ({len(sub_tasks)} steps)\n\n" | |
| for i, sub_task in enumerate(sub_tasks, 1): | |
| result += f"{i}. {sub_task}\n" | |
| return result | |
| except Exception as e: | |
| logger.error(f"Decomposition failed: {str(e)}") | |
| return f"Error: {str(e)}" | |
| def _format_history(self) -> str: | |
| """Format task history for display.""" | |
| if not self.task_history: | |
| return "π No tasks executed yet" | |
| history_text = "π **Recent Tasks**\n\n" | |
| for i, task in enumerate(reversed(self.task_history), 1): | |
| status = "β " if task["success"] else "β οΈ" | |
| history_text += f"{i}. {status} {task['task']}\n" | |
| history_text += f" URL: {task['url']}\n" | |
| history_text += f" Steps: {task['steps_completed']}\n" | |
| history_text += f" Time: {task['timestamp']}\n\n" | |
| return history_text | |
| def create_interface(): | |
| """Create Gradio interface with theme and styling.""" | |
| interface = BrowserAgentInterface() | |
| with gr.Blocks( | |
| title="π€ Autonomous Browser Agent", | |
| theme=gr.themes.Soft() | |
| ) as demo: | |
| gr.Markdown(""" | |
| # π€ Autonomous Browser Agent with MAYINI Framework | |
| ### Intelligent Web Automation Powered by Deep Learning | |
| This agent combines: | |
| - **π§ MAYINI Framework** - Custom deep learning for decision-making | |
| - **ποΈ Vision Transformers** - Visual page understanding | |
| - **π Playwright** - Cross-browser automation | |
| - **π Reinforcement Learning** - Continuous improvement | |
| --- | |
| """) | |
| with gr.Tab("π Execute Task"): | |
| gr.Markdown("### Execute a web automation task") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| task_input = gr.Textbox( | |
| label="π Task Description", | |
| placeholder="Example: Search for flights from NYC to London on Dec 20", | |
| lines=3, | |
| info="Describe what you want the agent to do" | |
| ) | |
| url_input = gr.Textbox( | |
| label="π Starting URL", | |
| placeholder="https://www.google.com/flights", | |
| value="https://www.google.com", | |
| info="URL where the agent will start" | |
| ) | |
| with gr.Row(): | |
| headless_checkbox = gr.Checkbox( | |
| label="π Run Headless", | |
| value=True, | |
| info="Run browser in background (no visible window)" | |
| ) | |
| max_steps_slider = gr.Slider( | |
| minimum=5, | |
| maximum=100, | |
| value=30, | |
| step=5, | |
| label="β±οΈ Max Steps", | |
| info="Maximum number of actions to attempt" | |
| ) | |
| execute_btn = gr.Button( | |
| "βΆοΈ Execute Task", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| status_output = gr.Textbox( | |
| label="π Status", | |
| lines=12, | |
| interactive=False, | |
| show_label=True | |
| ) | |
| with gr.Row(): | |
| results_output = gr.Textbox( | |
| label="π Detailed Results (JSON)", | |
| lines=15, | |
| interactive=False, | |
| max_lines=20 | |
| ) | |
| history_output = gr.Textbox( | |
| label="π Task History", | |
| lines=15, | |
| interactive=False | |
| ) | |
| execute_btn.click( | |
| fn=interface.execute_task_sync, | |
| inputs=[task_input, url_input, headless_checkbox, max_steps_slider], | |
| outputs=[status_output, results_output, history_output] | |
| ) | |
| with gr.Tab("π Task Planner"): | |
| gr.Markdown("### Visualize how your task will be decomposed") | |
| with gr.Row(): | |
| planner_task_input = gr.Textbox( | |
| label="π Task", | |
| placeholder="Example: Buy a laptop on Amazon", | |
| lines=2 | |
| ) | |
| decompose_btn = gr.Button("π¨ Decompose", variant="secondary") | |
| decomposition_output = gr.Textbox( | |
| label="π Sub-Tasks", | |
| lines=12, | |
| interactive=False | |
| ) | |
| decompose_btn.click( | |
| fn=interface.decompose_task, | |
| inputs=[planner_task_input], | |
| outputs=[decomposition_output] | |
| ) | |
| with gr.Tab("βΉοΈ About"): | |
| gr.Markdown(""" | |
| ## About This Project | |
| ### ποΈ Architecture | |
| This autonomous browser agent combines cutting-edge technologies: | |
| 1. **MAYINI Framework**: Custom deep learning library with neural networks | |
| 2. **Vision Transformers**: Visual page understanding without HTML dependency | |
| 3. **Playwright**: Cross-browser automation with auto-waiting | |
| 4. **Reinforcement Learning**: Policy gradient methods for improvement | |
| ### π― Key Features | |
| - **Hierarchical Planning**: Breaks complex tasks into sub-goals | |
| - **Visual Understanding**: Screenshot-based page comprehension | |
| - **Memory-Augmented**: LSTM networks remember past interactions | |
| - **Multi-Task Learning**: Trained on diverse web tasks | |
| - **Exploration**: Curiosity-driven discovery of new actions | |
| ### π Use Cases | |
| - Form filling and submission | |
| - Web scraping and data extraction | |
| - E-commerce automation | |
| - Navigation and search | |
| - Testing and QA | |
| ### π Links | |
| - [GitHub](https://github.com/yourusername/autonomous-browser-agent) | |
| - [MAYINI Framework](https://pypi.org/project/mayini-framework/) | |
| - [Playwright](https://playwright.dev/) | |
| - [Documentation](https://docs.example.com) | |
| ### π License | |
| MIT License - Free to use and modify! | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| <div style="text-align: center;"> | |
| <p>Built with β€οΈ using MAYINI, Playwright, and Vision Transformers</p> | |
| <p>Β© 2024 | Autonomous Browser Agent Project</p> | |
| </div> | |
| """) | |
| return demo | |
| # Main entry point | |
| if __name__ == "__main__": | |
| logger.info("π Starting Autonomous Browser Agent Web Interface...") | |
| logger.info(f"π§ Agent Available: {AGENT_AVAILABLE}") | |
| demo = create_interface() | |
| # Launch with Hugging Face Spaces configuration | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True | |
| ) |