1qwsd commited on
Commit
dbf5107
Β·
verified Β·
1 Parent(s): b213e0f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +407 -0
app.py ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import asyncio
4
+ from typing import Dict, List, Tuple
5
+ import os
6
+ from datetime import datetime
7
+ from loguru import logger
8
+ import sys
9
+ import json
10
+
11
+ # Configure logging
12
+ logger.remove()
13
+ logger.add(sys.stdout, level="INFO", format="{time:HH:mm:ss} | {level: <8} | {message}")
14
+
15
+ # Try importing agent components
16
+ try:
17
+ from agent.autonomous_agent import AutonomousBrowserAgent
18
+ from agent.planner_agent import PlannerAgent
19
+ from mayini_integration.policy_network import MayiniPolicyNetwork
20
+ AGENT_AVAILABLE = True
21
+ logger.info("βœ… Agent components loaded successfully")
22
+ except ImportError as e:
23
+ AGENT_AVAILABLE = False
24
+ logger.error(f"❌ Could not load agent: {str(e)}")
25
+
26
+
27
+ class BrowserAgentInterface:
28
+ """Gradio interface for the autonomous browser agent."""
29
+
30
+ def __init__(self):
31
+ """Initialize the interface."""
32
+ self.agent = None
33
+ self.task_history: List[Dict] = []
34
+ self.max_history = 10
35
+ logger.info("πŸš€ Browser Agent Interface initialized")
36
+
37
+ def execute_task_sync(
38
+ self,
39
+ task: str,
40
+ url: str,
41
+ headless: bool,
42
+ max_steps: int
43
+ ) -> Tuple[str, str, str]:
44
+ """
45
+ Synchronous wrapper for Gradio compatibility.
46
+
47
+ Args:
48
+ task: Task description
49
+ url: Starting URL
50
+ headless: Run headless
51
+ max_steps: Maximum steps
52
+
53
+ Returns:
54
+ Tuple of (status, results_json, history_text)
55
+ """
56
+ return asyncio.run(self.execute_task_async(task, url, headless, max_steps))
57
+
58
+ async def execute_task_async(
59
+ self,
60
+ task: str,
61
+ url: str,
62
+ headless: bool,
63
+ max_steps: int
64
+ ) -> Tuple[str, str, str]:
65
+ """
66
+ Execute task asynchronously.
67
+
68
+ Args:
69
+ task: Task description
70
+ url: Starting URL
71
+ headless: Run in headless mode
72
+ max_steps: Maximum steps
73
+
74
+ Returns:
75
+ Tuple of (status_text, results_json, history_text)
76
+ """
77
+ if not AGENT_AVAILABLE:
78
+ return (
79
+ "❌ Demo Mode: Agent not available. This is a demo interface.",
80
+ json.dumps({"error": "Agent components not loaded", "demo": True}, indent=2),
81
+ "No tasks executed yet (demo mode)"
82
+ )
83
+
84
+ if not task.strip():
85
+ return (
86
+ "⚠️ Error: Task description cannot be empty",
87
+ json.dumps({"error": "Empty task"}, indent=2),
88
+ "Please enter a task description"
89
+ )
90
+
91
+ if not url.strip():
92
+ return (
93
+ "⚠️ Error: URL cannot be empty",
94
+ json.dumps({"error": "Empty URL"}, indent=2),
95
+ "Please enter a starting URL"
96
+ )
97
+
98
+ try:
99
+ logger.info(f"πŸ“ Executing task: {task}")
100
+ logger.info(f"🌐 URL: {url}")
101
+ logger.info(f"βš™οΈ Headless: {headless}, Max Steps: {max_steps}")
102
+
103
+ # Initialize agent
104
+ self.agent = AutonomousBrowserAgent(
105
+ headless=headless,
106
+ browser_type="chromium",
107
+ embedding_dim=512,
108
+ hidden_dim=256,
109
+ num_actions=50
110
+ )
111
+
112
+ # Execute task
113
+ results = await self.agent.execute_task(
114
+ task=task,
115
+ url=url,
116
+ max_steps=max_steps,
117
+ mode="autonomous"
118
+ )
119
+
120
+ # Save to history
121
+ history_entry = {
122
+ "timestamp": datetime.now().isoformat(),
123
+ "task": task,
124
+ "url": url,
125
+ "success": results.get("success", False),
126
+ "steps_completed": len(results.get("steps", []))
127
+ }
128
+ self.task_history.append(history_entry)
129
+
130
+ # Keep only recent history
131
+ if len(self.task_history) > self.max_history:
132
+ self.task_history = self.task_history[-self.max_history:]
133
+
134
+ # Format results
135
+ status = "βœ… Success!" if results.get("success") else "⚠️ Partial Success"
136
+ steps_completed = len(results.get("steps", []))
137
+ sub_tasks_completed = sum(
138
+ 1 for step in results.get("steps", [])
139
+ if step.get("success", False)
140
+ )
141
+
142
+ status_text = f"""
143
+ {status}
144
+
145
+ πŸ“‹ **Task:** {task}
146
+ 🌐 **URL:** {url}
147
+ πŸ“Š **Steps Completed:** {steps_completed}/{max_steps}
148
+ βœ… **Successful Steps:** {sub_tasks_completed}
149
+ ⏱️ **Timestamp:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
150
+
151
+ **Sub-tasks:** {len(results.get("sub_tasks", []))}
152
+ {chr(10).join(f'β€’ {st}' for st in results.get("sub_tasks", [])[:5])}
153
+ """
154
+
155
+ # Format results as JSON
156
+ results_json = json.dumps(results, indent=2, default=str)
157
+
158
+ # Format history
159
+ history_text = self._format_history()
160
+
161
+ # Close agent
162
+ await self.agent.close()
163
+
164
+ logger.info(f"βœ… Task completed successfully")
165
+
166
+ return status_text, results_json, history_text
167
+
168
+ except Exception as e:
169
+ logger.error(f"❌ Task execution failed: {str(e)}")
170
+ if self.agent:
171
+ try:
172
+ await self.agent.close()
173
+ except:
174
+ pass
175
+
176
+ return (
177
+ f"❌ Error: {str(e)}",
178
+ json.dumps({"error": str(e), "type": type(e).__name__}, indent=2),
179
+ self._format_history()
180
+ )
181
+
182
+ def decompose_task(self, task: str) -> str:
183
+ """
184
+ Show task decomposition.
185
+
186
+ Args:
187
+ task: Task description
188
+
189
+ Returns:
190
+ Formatted sub-tasks
191
+ """
192
+ if not AGENT_AVAILABLE:
193
+ return "Agent not available (demo mode)"
194
+
195
+ if not task.strip():
196
+ return "Please enter a task description"
197
+
198
+ try:
199
+ planner = PlannerAgent()
200
+ sub_tasks = planner.decompose_task(task)
201
+
202
+ result = "πŸ“ **Task Decomposition**\n\n"
203
+ result += f"**Original Task:** {task}\n\n"
204
+ result += f"**Sub-tasks:** ({len(sub_tasks)} steps)\n\n"
205
+
206
+ for i, sub_task in enumerate(sub_tasks, 1):
207
+ result += f"{i}. {sub_task}\n"
208
+
209
+ return result
210
+ except Exception as e:
211
+ logger.error(f"Decomposition failed: {str(e)}")
212
+ return f"Error: {str(e)}"
213
+
214
+ def _format_history(self) -> str:
215
+ """Format task history for display."""
216
+ if not self.task_history:
217
+ return "πŸ“œ No tasks executed yet"
218
+
219
+ history_text = "πŸ“œ **Recent Tasks**\n\n"
220
+ for i, task in enumerate(reversed(self.task_history), 1):
221
+ status = "βœ…" if task["success"] else "⚠️"
222
+ history_text += f"{i}. {status} {task['task']}\n"
223
+ history_text += f" URL: {task['url']}\n"
224
+ history_text += f" Steps: {task['steps_completed']}\n"
225
+ history_text += f" Time: {task['timestamp']}\n\n"
226
+
227
+ return history_text
228
+
229
+
230
+ def create_interface():
231
+ """Create Gradio interface with theme and styling."""
232
+ interface = BrowserAgentInterface()
233
+
234
+ with gr.Blocks(
235
+ title="πŸ€– Autonomous Browser Agent",
236
+ theme=gr.themes.Soft()
237
+ ) as demo:
238
+ gr.Markdown("""
239
+ # πŸ€– Autonomous Browser Agent with MAYINI Framework
240
+
241
+ ### Intelligent Web Automation Powered by Deep Learning
242
+
243
+ This agent combines:
244
+ - **🧠 MAYINI Framework** - Custom deep learning for decision-making
245
+ - **πŸ‘οΈ Vision Transformers** - Visual page understanding
246
+ - **🎭 Playwright** - Cross-browser automation
247
+ - **πŸ”„ Reinforcement Learning** - Continuous improvement
248
+
249
+ ---
250
+ """)
251
+
252
+ with gr.Tab("πŸš€ Execute Task"):
253
+ gr.Markdown("### Execute a web automation task")
254
+
255
+ with gr.Row():
256
+ with gr.Column(scale=3):
257
+ task_input = gr.Textbox(
258
+ label="πŸ“ Task Description",
259
+ placeholder="Example: Search for flights from NYC to London on Dec 20",
260
+ lines=3,
261
+ info="Describe what you want the agent to do"
262
+ )
263
+
264
+ url_input = gr.Textbox(
265
+ label="🌐 Starting URL",
266
+ placeholder="https://www.google.com/flights",
267
+ value="https://www.google.com",
268
+ info="URL where the agent will start"
269
+ )
270
+
271
+ with gr.Row():
272
+ headless_checkbox = gr.Checkbox(
273
+ label="🎭 Run Headless",
274
+ value=True,
275
+ info="Run browser in background (no visible window)"
276
+ )
277
+ max_steps_slider = gr.Slider(
278
+ minimum=5,
279
+ maximum=100,
280
+ value=30,
281
+ step=5,
282
+ label="⏱️ Max Steps",
283
+ info="Maximum number of actions to attempt"
284
+ )
285
+
286
+ execute_btn = gr.Button(
287
+ "▢️ Execute Task",
288
+ variant="primary",
289
+ size="lg"
290
+ )
291
+
292
+ with gr.Column(scale=1):
293
+ status_output = gr.Textbox(
294
+ label="πŸ“Š Status",
295
+ lines=12,
296
+ interactive=False,
297
+ show_label=True
298
+ )
299
+
300
+ with gr.Row():
301
+ results_output = gr.Textbox(
302
+ label="πŸ“„ Detailed Results (JSON)",
303
+ lines=15,
304
+ interactive=False,
305
+ max_lines=20
306
+ )
307
+ history_output = gr.Textbox(
308
+ label="πŸ“œ Task History",
309
+ lines=15,
310
+ interactive=False
311
+ )
312
+
313
+ execute_btn.click(
314
+ fn=interface.execute_task_sync,
315
+ inputs=[task_input, url_input, headless_checkbox, max_steps_slider],
316
+ outputs=[status_output, results_output, history_output]
317
+ )
318
+
319
+ with gr.Tab("πŸ” Task Planner"):
320
+ gr.Markdown("### Visualize how your task will be decomposed")
321
+
322
+ with gr.Row():
323
+ planner_task_input = gr.Textbox(
324
+ label="πŸ“ Task",
325
+ placeholder="Example: Buy a laptop on Amazon",
326
+ lines=2
327
+ )
328
+ decompose_btn = gr.Button("πŸ”¨ Decompose", variant="secondary")
329
+
330
+ decomposition_output = gr.Textbox(
331
+ label="πŸ“‹ Sub-Tasks",
332
+ lines=12,
333
+ interactive=False
334
+ )
335
+
336
+ decompose_btn.click(
337
+ fn=interface.decompose_task,
338
+ inputs=[planner_task_input],
339
+ outputs=[decomposition_output]
340
+ )
341
+
342
+ with gr.Tab("ℹ️ About"):
343
+ gr.Markdown("""
344
+ ## About This Project
345
+
346
+ ### πŸ—οΈ Architecture
347
+
348
+ This autonomous browser agent combines cutting-edge technologies:
349
+
350
+ 1. **MAYINI Framework**: Custom deep learning library with neural networks
351
+ 2. **Vision Transformers**: Visual page understanding without HTML dependency
352
+ 3. **Playwright**: Cross-browser automation with auto-waiting
353
+ 4. **Reinforcement Learning**: Policy gradient methods for improvement
354
+
355
+ ### 🎯 Key Features
356
+
357
+ - **Hierarchical Planning**: Breaks complex tasks into sub-goals
358
+ - **Visual Understanding**: Screenshot-based page comprehension
359
+ - **Memory-Augmented**: LSTM networks remember past interactions
360
+ - **Multi-Task Learning**: Trained on diverse web tasks
361
+ - **Exploration**: Curiosity-driven discovery of new actions
362
+
363
+ ### πŸ“š Use Cases
364
+
365
+ - Form filling and submission
366
+ - Web scraping and data extraction
367
+ - E-commerce automation
368
+ - Navigation and search
369
+ - Testing and QA
370
+
371
+ ### πŸ”— Links
372
+
373
+ - [GitHub](https://github.com/yourusername/autonomous-browser-agent)
374
+ - [MAYINI Framework](https://pypi.org/project/mayini-framework/)
375
+ - [Playwright](https://playwright.dev/)
376
+ - [Documentation](https://docs.example.com)
377
+
378
+ ### πŸ“„ License
379
+
380
+ MIT License - Free to use and modify!
381
+ """)
382
+
383
+ gr.Markdown("""
384
+ ---
385
+ <div style="text-align: center;">
386
+ <p>Built with ❀️ using MAYINI, Playwright, and Vision Transformers</p>
387
+ <p>Β© 2024 | Autonomous Browser Agent Project</p>
388
+ </div>
389
+ """)
390
+
391
+ return demo
392
+
393
+
394
+ # Main entry point
395
+ if __name__ == "__main__":
396
+ logger.info("πŸš€ Starting Autonomous Browser Agent Web Interface...")
397
+ logger.info(f"🧠 Agent Available: {AGENT_AVAILABLE}")
398
+
399
+ demo = create_interface()
400
+
401
+ # Launch with Hugging Face Spaces configuration
402
+ demo.launch(
403
+ server_name="0.0.0.0",
404
+ server_port=7860,
405
+ share=False,
406
+ show_error=True
407
+ )