Leonardo commited on
Commit
06ac681
·
verified ·
1 Parent(s): 7ab7a72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -741
app.py CHANGED
@@ -1,21 +1,13 @@
1
  """
2
- OpenDeepResearch Web Interface Application
3
 
4
- This module provides a Gradio-based web interface for interacting with AI agents
5
- using the smolagents framework. It integrates document processing tools,
6
- web searching, and image generation capabilities.
7
  """
8
 
9
- import mimetypes
10
  import os
11
- import re
12
- import shutil
13
- import datetime
14
- from typing import Optional, List, Dict, Any
15
-
16
  from dotenv import load_dotenv
17
  from huggingface_hub import login
18
- import gradio as gr
19
 
20
  from scripts.text_inspector_tool import TextInspectorTool
21
  from scripts.text_web_browser import (
@@ -33,70 +25,63 @@ from scripts.text_cleaner_tool import TextCleanerTool
33
 
34
  from smolagents import (
35
  CodeAgent,
36
- HfApiModel,
37
  LiteLLMModel,
38
- OpenAIServerModel,
39
- TransformersModel,
40
  GoogleSearchTool,
41
  Tool,
42
  FinalAnswerTool,
43
  )
44
- from smolagents.gradio_ui import pull_messages_from_step, stream_to_gradio
45
 
46
- # ------------------------ Configuration and Setup ------------------------
47
- # Constants and configurations
48
  AUTHORIZED_IMPORTS = [
49
- "requests", # Web requests (fetching data from the internet)
50
- "zipfile", # Working with ZIP archives
51
- "pandas", # Data manipulation and analysis (DataFrames)
52
- "numpy", # Numerical computing (arrays, linear algebra)
53
- "sympy", # Symbolic mathematics (algebra, calculus)
54
- "json", # JSON data serialization/deserialization
55
- "bs4", # Beautiful Soup for HTML/XML parsing
56
- "pubchempy", # Accessing PubChem chemical database
57
  "yaml",
58
- "xml", # XML processing
59
- "yahoo_finance", # Fetching stock data
60
- "Bio", # Bioinformatics tools (e.g., sequence analysis)
61
- "sklearn", # Scikit-learn for machine learning
62
- "scipy", # Scientific computing (stats, optimization)
63
- "pydub", # Audio manipulation
64
- "PIL", # Pillow for image processing
65
- "chess", # Chess-related functionality
66
- "PyPDF2", # PDF manipulation
67
- "pptx", # PowerPoint file manipulation
68
- "torch", # PyTorch for neural networks
69
- "datetime", # Date and time handling
70
- "fractions", # Rational number arithmetic
71
- "csv", # CSV file reading/writing
72
- "cleantext", # Text cleaning and normalization
73
- "os", # Operating system interaction (file system, etc.) VERY IMPORTANT
74
- "re", # Regular expressions for text processing
75
- "collections", # Useful data structures (e.g., defaultdict, Counter)
76
- "math", # Basic mathematical functions
77
- "random", # Random number generation
78
- "io", # Input/output streams
79
- "urllib.parse", # URL parsing and manipulation (safe URL handling)
80
- "typing", # Support for type hints (improve code clarity)
81
- "concurrent.futures", # For parallel execution
82
- "time", # Measuring time
83
- "tempfile", # Creating temporary files and directories
84
- # Data Visualization (if needed) - Consider security implications carefully
85
- "matplotlib", # Plotting library (basic charts)
86
- "seaborn", # Statistical data visualization (more advanced)
87
- # Web Scraping (more specific/controlled) - Consider ethical implications
88
- "lxml", # Faster XML/HTML processing (alternative to bs4)
89
- "selenium", # Automated browser control (for dynamic websites)
90
- # Database interaction (if needed) - Handle credentials securely!
91
- "sqlite3", # SQLite database access
92
- # Task scheduling
93
- "schedule", # Allow the agent to schedule tasks
94
  ]
95
 
96
  USER_AGENT = (
97
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
98
  "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
99
  )
 
100
  BROWSER_CONFIG = {
101
  "viewport_size": 1024 * 5,
102
  "downloads_folder": "downloads_folder",
@@ -110,708 +95,114 @@ BROWSER_CONFIG = {
110
  CUSTOM_ROLE_CONVERSIONS = {"tool-call": "assistant", "tool-response": "user"}
111
 
112
 
113
- ALLOWED_FILE_TYPES = [
114
- "application/pdf",
115
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
116
- "text/plain",
117
- "text/markdown",
118
- "application/json",
119
- "image/png",
120
- "image/webp",
121
- "image/jpeg",
122
- "image/gif",
123
- "video/mp4",
124
- "audio/mpeg",
125
- "audio/wav",
126
- "audio/ogg",
127
- ]
128
-
129
-
130
  def setup_environment():
131
  """Initialize environment variables and authentication."""
132
  load_dotenv(override=True)
133
- if os.getenv("HF_TOKEN"): # Check if token is actually set
134
  login(os.getenv("HF_TOKEN"))
135
- print("HF_TOKEN (last 10 characters):", os.getenv("HF_TOKEN")[-10:])
136
  else:
137
- print("HF_TOKEN not found in environment variables.")
138
-
139
-
140
- # ------------------------ Model and Tool Management ------------------------
141
- class ModelManager:
142
- """Manages model loading and initialization."""
143
-
144
- @staticmethod
145
- def load_model(chosen_inference: str, model_id: str, key_manager=None):
146
- """Load the specified model with appropriate configuration."""
147
- try:
148
- if chosen_inference == "hf_api":
149
- return HfApiModel(model_id=model_id)
150
 
151
- if chosen_inference == "hf_api_provider":
152
- return HfApiModel(provider="together")
153
-
154
- if chosen_inference == "litellm":
155
- return LiteLLMModel(model_id=model_id)
156
-
157
- if chosen_inference == "openai":
158
- if not key_manager:
159
- raise ValueError("Key manager required for OpenAI model")
160
-
161
- return OpenAIServerModel(
162
- model_id=model_id, api_key=key_manager.get_key("openai_api_key")
163
- )
164
-
165
- if chosen_inference == "transformers":
166
- return TransformersModel(
167
- model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct",
168
- device_map="auto",
169
- max_new_tokens=1000,
170
- )
171
 
172
- raise ValueError(f"Invalid inference type: {chosen_inference}")
 
173
 
174
- except Exception as e:
175
- print(f"✗ Couldn't load model: {e}")
176
- raise
177
 
 
 
 
178
 
179
- class ToolRegistry:
180
- """Manages tool initialization and organization."""
181
 
182
- @staticmethod
183
- def load_web_tools(model, browser, text_limit=20000):
184
- """Initialize and return web-related tools."""
185
- return [
186
- GoogleSearchTool(provider="serper"),
187
- VisitTool(browser),
188
- PageUpTool(browser),
189
- PageDownTool(browser),
190
- FinderTool(browser),
191
- FindNextTool(browser),
192
- ArchiveSearchTool(browser),
193
- TextInspectorTool(model, text_limit),
194
- ]
195
 
196
- @staticmethod
197
- def load_document_tools():
198
- """
199
- Initialize and return document processing, i.e. sanitisation and indexing, tools.
 
200
 
201
- Returns:
202
- List of document tools
203
- """
204
- return [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  FrontmatterGeneratorTool(),
206
  TextCleanerTool(),
207
  ]
 
 
 
208
 
209
- @staticmethod
210
- def load_image_generation_tools():
211
- """Initialize and return image generation tools."""
212
- try:
213
- return Tool.from_space(
214
- space_id="xkerser/FLUX.1-dev",
215
- name="image_generator",
216
- description=(
217
- "Generates high-quality AgentImage using the FLUX.1-dev model based on text prompts."
218
- ),
219
- )
220
- except Exception as e:
221
- print(f"✗ Couldn't initialize image generation tool: {e}")
222
- raise
223
-
224
-
225
- # ------------------------ Agent Creation and Execution ------------------------
226
- def create_agent():
227
- """
228
- Creates a fresh agent instance with properly configured tools.
229
-
230
- Returns:
231
- CodeAgent: Configured agent ready for use
232
-
233
- Raises:
234
- ValueError: If tool validation fails
235
- RuntimeError: If agent creation fails
236
- """
237
  try:
238
- # Initialize model
239
- model = LiteLLMModel(
240
- custom_role_conversions=CUSTOM_ROLE_CONVERSIONS,
241
- model_id="openrouter/google/gemini-2.0-flash-001",
242
- )
243
-
244
- # Initialize tools
245
- text_limit = 30000
246
- browser = SimpleTextBrowser(**BROWSER_CONFIG)
247
-
248
- # Create tool instances with proper error handling
249
- web_tools = ToolRegistry.load_web_tools(model, browser, text_limit)
250
-
251
- # Add FinalAnswerTool explicitly to ensure it's available
252
- final_answer_tool = FinalAnswerTool()
253
-
254
- try:
255
- doc_tools = ToolRegistry.load_document_tools()
256
- except AssertionError as e:
257
- print(f"Warning: Error loading document tools: {str(e)}")
258
- print("Attempting to continue with available tools...")
259
- doc_tools = []
260
-
261
- try:
262
- image_generator = ToolRegistry.load_image_generation_tools()
263
- except Exception as e:
264
- print(f"Warning: Image generation tools unavailable: {str(e)}")
265
- image_generator = None
266
-
267
- # Combine available tools (filter out None values)
268
- all_tools = [
269
- tool
270
- for tool in (
271
- [visualizer]
272
- + [final_answer_tool] # Added explicitly
273
- + web_tools
274
- + doc_tools
275
- + ([image_generator] if image_generator else [])
276
- )
277
- if tool is not None
278
- ]
279
-
280
- # Log available tools
281
- print(f"Loaded {len(all_tools)} tools successfully")
282
- for tool in all_tools:
283
- print(f"- {tool.name}: {tool.description[:50]}...")
284
-
285
- return CodeAgent(
286
- model=model,
287
- tools=all_tools,
288
- max_steps=15, # Increased from 12 to give more room for complex tasks
289
- verbosity_level=2,
290
- additional_authorized_imports=AUTHORIZED_IMPORTS,
291
- planning_interval=4,
292
- prompt_templates={
293
- "system_prompt": """You are a helpful AI assistant with access to various tools.
294
- Always think step by step, carefully planning your approach to the task.
295
- When using Python code:
296
- - Keep your code simple and readable
297
- - Use the final_answer tool to provide your final response
298
- Example of how to provide a final answer:
299
- ```python
300
- final_answer("This is my final answer based on my analysis.")
301
- ```
302
- """
303
- },
304
  )
305
  except Exception as e:
306
- print(f"Failed to create agent: {e}")
307
- raise RuntimeError(f"Agent creation failed: {e}") from e
308
-
309
-
310
- def detect_agent_loop(agent):
311
- """Check if agent is stuck in a loop of similar errors"""
312
- if not hasattr(agent, "memory") or not hasattr(agent.memory, "steps"):
313
- return False
314
-
315
- if len(agent.memory.steps) < 4:
316
- return False
317
-
318
- recent_steps = agent.memory.steps[-4:]
319
- error_count = sum(
320
- 1 for step in recent_steps if hasattr(step, "error") and step.error is not None
321
- )
322
-
323
- if error_count >= 3:
324
- return True
325
- return False
326
-
327
-
328
- # ------------------------ Gradio UI Components ------------------------
329
- class GradioUI:
330
- """A one-line interface to launch your agent in Gradio."""
331
-
332
- def __init__(self, file_upload_folder: str | None = None):
333
- """Initialize the Gradio UI with optional file upload functionality."""
334
- self.file_upload_folder = file_upload_folder
335
-
336
- if self.file_upload_folder is not None:
337
- if not os.path.exists(file_upload_folder):
338
- os.mkdir(file_upload_folder)
339
-
340
- def interact_with_agent(self, prompt, messages, session_state):
341
- """Main interaction handler with the agent."""
342
-
343
- # Get or create session-specific agent with cache persistence
344
- if "agent" not in session_state:
345
- try:
346
- session_state["agent"] = create_agent()
347
- session_state["creation_time"] = datetime.datetime.now()
348
- session_state["request_count"] = 0
349
- except Exception as e:
350
- messages.append(
351
- gr.ChatMessage(
352
- role="assistant",
353
- content=f"**Error initializing agent**: {str(e)}\n\nPlease refresh the page and try again.",
354
- )
355
- )
356
- yield messages
357
- return
358
-
359
- session_state["request_count"] += 1
360
-
361
- # Add user message
362
- messages.append(gr.ChatMessage(role="user", content=prompt))
363
- yield messages
364
-
365
- try:
366
- # Check if agent should be reset (e.g., if too many requests)
367
- reset_needed = session_state["request_count"] > 15 or detect_agent_loop(
368
- session_state["agent"]
369
- )
370
-
371
- # If agent is in a loop, provide a hint
372
- if detect_agent_loop(session_state["agent"]):
373
- messages.append(
374
- gr.ChatMessage(
375
- role="assistant",
376
- content="I notice I'm having trouble executing some commands. Let me try a different approach...",
377
- )
378
- )
379
- yield messages
380
-
381
- for msg in stream_to_gradio(
382
- session_state["agent"], task=prompt, reset_agent_memory=reset_needed
383
- ):
384
- messages.append(msg)
385
- yield messages
386
-
387
- # If we reset the agent memory, update the request count
388
- if reset_needed:
389
- session_state["request_count"] = 1
390
-
391
- except Exception as e:
392
- messages.append(
393
- gr.ChatMessage(
394
- role="assistant",
395
- content=f"**Error processing your request**: {str(e)}\n\nPlease try again with a different query.",
396
- )
397
- )
398
- yield messages
399
-
400
- def upload_file(self, file, file_uploads_log):
401
- """Handle file uploads with validation, security, and clear feedback."""
402
- if file is None:
403
- return gr.Textbox("No file uploaded", visible=True), file_uploads_log
404
-
405
- try:
406
- # Get file size and check limit before processing
407
- file_size_mb = os.path.getsize(file.name) / (1024 * 1024) # Size in MB
408
- max_file_size_mb = 50 # Define the limit
409
-
410
- if file_size_mb > max_file_size_mb:
411
- return (
412
- gr.Textbox(
413
- f"❌ File size ({file_size_mb:.1f} MB) exceeds {max_file_size_mb} MB limit.",
414
- visible=True,
415
- ),
416
- file_uploads_log,
417
- )
418
-
419
- # Check MIME type
420
- mime_type, _ = mimetypes.guess_type(file.name)
421
-
422
- # Ensure Markdown files are recognized properly
423
- if file.name.lower().endswith(".md"):
424
- mime_type = "text/markdown"
425
-
426
- if mime_type not in ALLOWED_FILE_TYPES:
427
- allowed_extensions = [
428
- t.rsplit("/", maxsplit=1)[-1] for t in ALLOWED_FILE_TYPES
429
- ]
430
- return (
431
- gr.Textbox(
432
- f"❌ File type '{mime_type or 'unknown'}' is not allowed. Supported types: {', '.join(allowed_extensions)}",
433
- visible=True,
434
- ),
435
- file_uploads_log,
436
- )
437
-
438
- # Sanitize file name with better pattern
439
- original_name = os.path.basename(file.name)
440
- sanitized_name = re.sub(r"[^\w\-.]", "_", original_name)
441
-
442
- # Save the uploaded file
443
- file_path = os.path.join(self.file_upload_folder, sanitized_name)
444
- shutil.copy(file.name, file_path)
445
-
446
- return gr.Textbox(
447
- f"✓ File uploaded successfully: {os.path.basename(file_path)} ({file_size_mb:.1f} MB)",
448
- visible=True,
449
- ), file_uploads_log + [file_path]
450
-
451
- except Exception as e:
452
- return (
453
- gr.Textbox(f"❌ Upload error: {str(e)}", visible=True),
454
- file_uploads_log,
455
- )
456
-
457
- def log_user_message(self, text_input, file_uploads_log):
458
- """Process user message and handle file references with proper agent types."""
459
- message = text_input
460
-
461
- if len(file_uploads_log) > 0:
462
- # Group files by type for better agent processing
463
- file_info = {}
464
- for file_path in file_uploads_log:
465
- ext = os.path.splitext(file_path)[1].lower()
466
- if ext in [".jpg", ".jpeg", ".png", ".gif", ".webp"]:
467
- category = "images"
468
- elif ext in [".mp3", ".wav", ".ogg"]:
469
- category = "audio"
470
- elif ext in [".md"]:
471
- category = "markdown"
472
- elif ext in [".pdf"]:
473
- category = "pdf"
474
- else:
475
- category = "documents"
476
-
477
- if category not in file_info:
478
- file_info[category] = []
479
- file_info[category].append(
480
- file_path
481
- ) # Store full path for easier access
482
-
483
- # Format file information for the agent
484
- file_message = "\nYou have been provided with these files:\n"
485
- for category, files in file_info.items():
486
- # Convert to filename-only for display
487
- file_names = [os.path.basename(f) for f in files]
488
- file_message += f"- {category.capitalize()}: {', '.join(file_names)}\n"
489
- # Add full paths after names
490
- file_message += f" Paths: {', '.join(files)}\n"
491
-
492
- message += file_message
493
- message += (
494
- "\nUse inspect_file_as_text for documents/markdown/pdf, "
495
- "visualizer for images, and the appropriate tools for audio files. "
496
- "Remember to use the full file path when accessing the files."
497
- )
498
-
499
- return (
500
- message,
501
- gr.Textbox(value="", interactive=False, placeholder="Processing..."),
502
- gr.Button(interactive=False),
503
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
 
505
- def detect_device(self, request: gr.Request):
506
- """Detect whether the user is on mobile or desktop device."""
507
- if not request:
508
- return "Unknown device" # Handle case where request is none.
509
-
510
- # Method 1: Check sec-ch-ua-mobile header
511
- is_mobile_header = request.headers.get("sec-ch-ua-mobile")
512
- if is_mobile_header:
513
- return "Mobile" if "?1" in is_mobile_header else "Desktop"
514
-
515
- # Method 2: Check user-agent string
516
- user_agent = request.headers.get("user-agent", "").lower()
517
- mobile_keywords = ["android", "iphone", "ipad", "mobile", "phone"]
518
-
519
- if any(keyword in user_agent for keyword in mobile_keywords):
520
- return "Mobile"
521
-
522
- # Method 3: Check platform
523
- platform = request.headers.get("sec-ch-ua-platform", "").lower()
524
- if platform:
525
- if platform in ['"android"', '"ios"']:
526
- return "Mobile"
527
- if platform in ['"windows"', '"macos"', '"linux"']:
528
- return "Desktop"
529
-
530
- # Default case if no clear indicators
531
- return "Desktop"
532
-
533
- def launch(self, **kwargs):
534
- """Launch the Gradio UI with responsive layout."""
535
- with gr.Blocks(theme="ocean", fill_height=True) as demo:
536
- # Different layouts for mobile and computer devices
537
- @gr.render()
538
- def layout(request: gr.Request):
539
- device = self.detect_device(request)
540
- print(f"device - {device}")
541
- # Render layout with sidebar
542
- if device == "Desktop":
543
- return self._create_desktop_layout()
544
- return self._create_mobile_layout()
545
-
546
- demo.queue(max_size=20).launch(
547
- debug=True, **kwargs
548
- ) # Add queue with reasonable size
549
-
550
- def _create_desktop_layout(self):
551
- """Create the desktop layout with sidebar and enhanced styling."""
552
- with gr.Blocks(fill_height=True) as sidebar_demo:
553
- with gr.Sidebar():
554
- gr.Markdown(
555
- """# 🔍 OpenDeepResearch
556
- ### Smolagents + Document Tools
557
- """
558
- )
559
- with gr.Group():
560
- gr.Markdown("**What can I help you with today?**", container=True)
561
- text_input = gr.Textbox(
562
- lines=4,
563
- label="Your request",
564
- container=False,
565
- placeholder="Enter your question or task here...",
566
- show_label=False,
567
- )
568
-
569
- with gr.Row():
570
- clear_btn = gr.Button("Clear", variant="secondary")
571
- launch_research_btn = gr.Button("Run", variant="primary")
572
-
573
- # File upload section with better labeling
574
- if self.file_upload_folder is not None:
575
- with gr.Group():
576
- gr.Markdown("**📎 Upload Documents**")
577
- upload_file = gr.File(
578
- label="Upload files for analysis",
579
- file_types=[
580
- "pdf",
581
- "docx",
582
- "txt",
583
- "md",
584
- "csv",
585
- "xlsx",
586
- "jpg",
587
- "png",
588
- ],
589
- file_count="multiple",
590
- )
591
- upload_status = gr.Textbox(
592
- label="Upload Status", interactive=False, visible=False
593
- )
594
- file_uploads_log = gr.State([])
595
-
596
- # Show uploaded files list
597
- uploaded_files_display = gr.Markdown("No files uploaded yet")
598
-
599
- upload_file.change(
600
- self.upload_file,
601
- [upload_file, file_uploads_log],
602
- [upload_status, file_uploads_log],
603
- ).then(
604
- lambda files: (
605
- "**Uploaded Files:**\n"
606
- + "\n".join([f"- {os.path.basename(f)}" for f in files])
607
- if files
608
- else "No files uploaded yet"
609
- ),
610
- [file_uploads_log],
611
- [uploaded_files_display],
612
- )
613
-
614
- # Add helpful tool usage examples
615
- with gr.Accordion("Tool Usage Examples", open=False):
616
- gr.Markdown(
617
- """
618
- ### Document Tools
619
- - "Extract metadata from this document" - Uses frontmatter generator
620
- - "Clean and format this text" - Uses text cleaner
621
-
622
- ### File Analysis
623
- - "Analyze this PDF and summarize the key points" - Uses inspect_file_as_text
624
- - "What's in this image?" - Uses visualizer
625
-
626
- ### Web Search
627
- - "Find information about XYZ" - Uses search tools
628
- """
629
- )
630
-
631
- gr.HTML("<br><hr><h4><center>Powered by:</center></h4>")
632
- with gr.Row():
633
- gr.HTML(
634
- """
635
- <div style="display: flex; align-items: center; justify-content: center; gap: 8px; font-family: system-ui, -apple-system, sans-serif;">
636
- <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png"
637
- style="width: 32px; height: 32px; object-fit: contain;" alt="logo">
638
- <a target="_blank" href="https://github.com/huggingface/smolagents">
639
- <b>huggingface/smolagents</b>
640
- </a>
641
- </div>
642
- """
643
- )
644
-
645
- # Main chat area with improved styling
646
- session_state = gr.State({})
647
- stored_messages = gr.State([])
648
- if "file_uploads_log" not in locals():
649
- file_uploads_log = gr.State([])
650
-
651
- chatbot = gr.Chatbot(
652
- label="OpenDeepResearch Assistant",
653
- type="messages",
654
- avatar_images=(
655
- None,
656
- "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
657
- ),
658
- resizeable=True,
659
- show_copy_button=True,
660
- scale=1,
661
- elem_id="my-chatbot",
662
- height=700,
663
- )
664
-
665
- # Connect clear button
666
- clear_btn.click(
667
- lambda: ([], [], {"agent": session_state.get("agent")}),
668
- None,
669
- [chatbot, stored_messages, session_state],
670
- )
671
-
672
- # Connect event handlers
673
- self._connect_event_handlers(
674
- text_input,
675
- launch_research_btn,
676
- file_uploads_log,
677
- stored_messages,
678
- chatbot,
679
- session_state,
680
- )
681
-
682
- return sidebar_demo
683
-
684
- def _create_mobile_layout(self):
685
- """Create the mobile layout (simpler without sidebar)."""
686
- with gr.Blocks(fill_height=True) as simple_demo:
687
- gr.Markdown("""# 🔍 OpenDeepResearch""")
688
- # Add session state to store session-specific data
689
- session_state = gr.State({})
690
- stored_messages = gr.State([])
691
- file_uploads_log = gr.State([])
692
-
693
- chatbot = gr.Chatbot(
694
- label="OpenDeepResearch Assistant",
695
- type="messages",
696
- avatar_images=(
697
- None,
698
- "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
699
- ),
700
- resizeable=True,
701
- scale=1,
702
- )
703
-
704
- # If an upload folder is provided, enable the upload feature
705
- if self.file_upload_folder is not None:
706
- upload_file = gr.File(
707
- label="Upload a file",
708
- file_types=["pdf", "docx", "txt", "md", "jpg", "png"],
709
- )
710
- upload_status = gr.Textbox(
711
- label="Upload Status", interactive=False, visible=False
712
- )
713
- upload_file.change(
714
- self.upload_file,
715
- [upload_file, file_uploads_log],
716
- [upload_status, file_uploads_log],
717
- )
718
-
719
- text_input = gr.Textbox(
720
- lines=2,
721
- label="Your question",
722
- placeholder="Enter your question here",
723
- )
724
-
725
- with gr.Row():
726
- clear_btn = gr.Button("Clear", variant="secondary")
727
- launch_research_btn = gr.Button("Run", variant="primary")
728
-
729
- # Connect clear button
730
- clear_btn.click(
731
- lambda: ([], [], {"agent": session_state.get("agent")}),
732
- None,
733
- [chatbot, stored_messages, session_state],
734
- )
735
-
736
- self._connect_event_handlers(
737
- text_input,
738
- launch_research_btn,
739
- file_uploads_log,
740
- stored_messages,
741
- chatbot,
742
- session_state,
743
- )
744
-
745
- return simple_demo
746
-
747
- def _connect_event_handlers(
748
- self,
749
- text_input,
750
- launch_research_btn,
751
- file_uploads_log,
752
- stored_messages,
753
- chatbot,
754
- session_state,
755
- ):
756
- """Connect the event handlers for input elements."""
757
- # Connect text input submit event
758
- text_input.submit(
759
- self.log_user_message,
760
- [text_input, file_uploads_log],
761
- [stored_messages, text_input, launch_research_btn],
762
- ).then(
763
- self.interact_with_agent,
764
- [stored_messages, chatbot, session_state],
765
- [chatbot],
766
- ).then(
767
- lambda: (
768
- gr.Textbox(
769
- interactive=True,
770
- placeholder="Enter your prompt here and press the button",
771
- ),
772
- gr.Button(interactive=True),
773
- ),
774
- None,
775
- [text_input, launch_research_btn],
776
- )
777
-
778
- # Connect button click event
779
- launch_research_btn.click(
780
- self.log_user_message,
781
- [text_input, file_uploads_log],
782
- [stored_messages, text_input, launch_research_btn],
783
- ).then(
784
- self.interact_with_agent,
785
- [stored_messages, chatbot, session_state],
786
- [chatbot],
787
- ).then(
788
- lambda: (
789
- gr.Textbox(
790
- interactive=True,
791
- placeholder="Enter your prompt here and press the button",
792
- ),
793
- gr.Button(interactive=True),
794
- ),
795
- None,
796
- [text_input, launch_research_btn],
797
- )
798
-
799
-
800
- # ------------------------ Execution ------------------------
801
- def main():
802
- """Main entry point for the application."""
803
- # Initialize environment
804
- setup_environment()
805
-
806
- # Ensure downloads folder exists
807
- os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
808
 
809
- # Ensure uploads folder exists
810
- os.makedirs("uploaded_files", exist_ok=True)
811
-
812
- # Launch UI
813
- GradioUI(file_upload_folder="uploaded_files").launch()
814
 
 
 
815
 
816
- if __name__ == "__main__":
817
- main()
 
1
  """
2
+ OpenDeepResearch Agent Creation Module
3
 
4
+ This module provides functions to create various agent configurations
5
+ for different use cases.
 
6
  """
7
 
 
8
  import os
 
 
 
 
 
9
  from dotenv import load_dotenv
10
  from huggingface_hub import login
 
11
 
12
  from scripts.text_inspector_tool import TextInspectorTool
13
  from scripts.text_web_browser import (
 
25
 
26
  from smolagents import (
27
  CodeAgent,
28
+ GradioUI,
29
  LiteLLMModel,
 
 
30
  GoogleSearchTool,
31
  Tool,
32
  FinalAnswerTool,
33
  )
 
34
 
35
+ # Constants
 
36
  AUTHORIZED_IMPORTS = [
37
+ "requests",
38
+ "zipfile",
39
+ "pandas",
40
+ "numpy",
41
+ "sympy",
42
+ "json",
43
+ "bs4",
44
+ "pubchempy",
45
  "yaml",
46
+ "xml",
47
+ "yahoo_finance",
48
+ "Bio",
49
+ "sklearn",
50
+ "scipy",
51
+ "pydub",
52
+ "PIL",
53
+ "chess",
54
+ "PyPDF2",
55
+ "pptx",
56
+ "torch",
57
+ "datetime",
58
+ "fractions",
59
+ "csv",
60
+ "cleantext",
61
+ "os",
62
+ "re",
63
+ "collections",
64
+ "math",
65
+ "random",
66
+ "io",
67
+ "urllib.parse",
68
+ "typing",
69
+ "concurrent.futures",
70
+ "time",
71
+ "tempfile",
72
+ "matplotlib",
73
+ "seaborn",
74
+ "lxml",
75
+ "selenium",
76
+ "sqlite3",
77
+ "schedule",
 
 
 
 
78
  ]
79
 
80
  USER_AGENT = (
81
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
82
  "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
83
  )
84
+
85
  BROWSER_CONFIG = {
86
  "viewport_size": 1024 * 5,
87
  "downloads_folder": "downloads_folder",
 
95
  CUSTOM_ROLE_CONVERSIONS = {"tool-call": "assistant", "tool-response": "user"}
96
 
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  def setup_environment():
99
  """Initialize environment variables and authentication."""
100
  load_dotenv(override=True)
101
+ if os.getenv("HF_TOKEN"):
102
  login(os.getenv("HF_TOKEN"))
103
+ print("HF_TOKEN authenticated successfully")
104
  else:
105
+ print("HF_TOKEN not found in environment variables")
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
+ # Ensure download folder exists
108
+ os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ # Ensure upload folder exists
111
+ os.makedirs("uploaded_files", exist_ok=True)
112
 
 
 
 
113
 
114
+ def create_agent(model_id="openrouter/google/gemini-2.0-flash-001"):
115
+ """
116
+ Creates an agent with all necessary tools for research and document processing.
117
 
118
+ Args:
119
+ model_id: Model ID to use for the agent
120
 
121
+ Returns:
122
+ CodeAgent: Fully configured agent
123
+ """
124
+ # Setup environment first
125
+ setup_environment()
 
 
 
 
 
 
 
 
126
 
127
+ # Initialize model
128
+ model = LiteLLMModel(
129
+ custom_role_conversions=CUSTOM_ROLE_CONVERSIONS,
130
+ model_id=model_id,
131
+ )
132
 
133
+ # Initialize tools
134
+ text_limit = 30000
135
+ browser = SimpleTextBrowser(**BROWSER_CONFIG)
136
+
137
+ # Create tool instances
138
+ web_tools = [
139
+ GoogleSearchTool(provider="serper"),
140
+ VisitTool(browser),
141
+ PageUpTool(browser),
142
+ PageDownTool(browser),
143
+ FinderTool(browser),
144
+ FindNextTool(browser),
145
+ ArchiveSearchTool(browser),
146
+ TextInspectorTool(model, text_limit),
147
+ ]
148
+
149
+ # Add FinalAnswerTool explicitly
150
+ final_answer_tool = FinalAnswerTool()
151
+
152
+ # Load document tools
153
+ try:
154
+ doc_tools = [
155
  FrontmatterGeneratorTool(),
156
  TextCleanerTool(),
157
  ]
158
+ except AssertionError as e:
159
+ print(f"Warning: Error loading document tools: {str(e)}")
160
+ doc_tools = []
161
 
162
+ # Load image generation tool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  try:
164
+ image_generator = Tool.from_space(
165
+ space_id="xkerser/FLUX.1-dev",
166
+ name="image_generator",
167
+ description="Generates high-quality images using the FLUX.1-dev model based on text prompts.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  )
169
  except Exception as e:
170
+ print(f"Warning: Image generation tool unavailable: {str(e)}")
171
+ image_generator = None
172
+
173
+ # Combine all tools
174
+ all_tools = [
175
+ tool
176
+ for tool in (
177
+ [visualizer]
178
+ + [final_answer_tool]
179
+ + web_tools
180
+ + doc_tools
181
+ + ([image_generator] if image_generator else [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  )
183
+ if tool is not None
184
+ ]
185
+
186
+ # Log available tools
187
+ print(f"Loaded {len(all_tools)} tools successfully")
188
+
189
+ # Create and return the agent
190
+ return CodeAgent(
191
+ model=model,
192
+ tools=all_tools,
193
+ max_steps=15,
194
+ verbosity_level=2,
195
+ additional_authorized_imports=AUTHORIZED_IMPORTS,
196
+ planning_interval=4,
197
+ )
198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
+ if __name__ == "__main__":
201
+ # Create an agent instance
202
+ agent_instance = create_agent()
 
 
203
 
204
+ # Create Gradio UI with the agent instance and file upload folder
205
+ demo = GradioUI(agent_instance, file_upload_folder="uploaded_files")
206
 
207
+ # Launch the UI
208
+ demo.launch()