Leonardo commited on
Commit
30b3d70
·
verified ·
1 Parent(s): 1683e3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +653 -95
app.py CHANGED
@@ -1,13 +1,13 @@
1
- """
2
- OpenDeepResearch Agent Creation Module
3
-
4
- This module provides functions to create various agent configurations
5
- for different use cases.
6
- """
7
-
8
  import os
 
 
 
 
 
 
9
  from dotenv import load_dotenv
10
  from huggingface_hub import login
 
11
 
12
  from scripts.text_inspector_tool import TextInspectorTool
13
  from scripts.text_web_browser import (
@@ -25,14 +25,18 @@ from scripts.text_cleaner_tool import TextCleanerTool
25
 
26
  from smolagents import (
27
  CodeAgent,
28
- GradioUI,
29
  LiteLLMModel,
 
 
30
  GoogleSearchTool,
31
  Tool,
32
- FinalAnswerTool,
33
  )
 
 
 
34
 
35
- # Constants
36
  AUTHORIZED_IMPORTS = [
37
  "requests",
38
  "zipfile",
@@ -81,7 +85,6 @@ USER_AGENT = (
81
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
82
  "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
83
  )
84
-
85
  BROWSER_CONFIG = {
86
  "viewport_size": 1024 * 5,
87
  "downloads_folder": "downloads_folder",
@@ -94,109 +97,664 @@ BROWSER_CONFIG = {
94
 
95
  CUSTOM_ROLE_CONVERSIONS = {"tool-call": "assistant", "tool-response": "user"}
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  def setup_environment():
99
- """Initialize environment variables and authentication."""
 
 
100
  load_dotenv(override=True)
101
- if os.getenv("HF_TOKEN"):
102
- login(os.getenv("HF_TOKEN"))
103
- print("HF_TOKEN authenticated successfully")
 
104
  else:
105
- print("HF_TOKEN not found in environment variables")
106
 
107
- # Ensure download folder exists
108
- os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
109
 
110
- # Ensure upload folder exists
111
- os.makedirs("uploaded_files", exist_ok=True)
 
 
 
112
 
 
 
 
 
113
 
114
- def create_agent(model_id="openrouter/google/gemini-2.0-flash-001"):
115
- """
116
- Creates an agent with all necessary tools for research and document processing.
 
117
 
118
- Args:
119
- model_id: Model ID to use for the agent
120
 
121
- Returns:
122
- CodeAgent: Fully configured agent
123
- """
124
- # Setup environment first
125
- setup_environment()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
- # Initialize model
128
- model = LiteLLMModel(
129
- custom_role_conversions=CUSTOM_ROLE_CONVERSIONS,
130
- model_id=model_id,
131
- )
132
-
133
- # Initialize tools
134
- text_limit = 30000
135
- browser = SimpleTextBrowser(**BROWSER_CONFIG)
136
-
137
- # Create tool instances
138
- web_tools = [
139
- GoogleSearchTool(provider="serper"),
140
- VisitTool(browser),
141
- PageUpTool(browser),
142
- PageDownTool(browser),
143
- FinderTool(browser),
144
- FindNextTool(browser),
145
- ArchiveSearchTool(browser),
146
- TextInspectorTool(model, text_limit),
147
- ]
148
-
149
- # Add FinalAnswerTool explicitly
150
- final_answer_tool = FinalAnswerTool()
151
-
152
- # Load document tools
153
- try:
154
- doc_tools = [
155
- FrontmatterGeneratorTool(),
156
- TextCleanerTool(),
157
  ]
158
- except AssertionError as e:
159
- print(f"Warning: Error loading document tools: {str(e)}")
160
- doc_tools = []
161
 
162
- # Load image generation tool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  try:
164
- image_generator = Tool.from_space(
165
- space_id="xkerser/FLUX.1-dev",
166
- name="image_generator",
167
- description="Generates high-quality images using the FLUX.1-dev model based on text prompts.",
168
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  except Exception as e:
170
- print(f"Warning: Image generation tool unavailable: {str(e)}")
171
- image_generator = None
172
-
173
- # Combine all tools
174
- all_tools = [
175
- tool
176
- for tool in (
177
- [visualizer]
178
- + [final_answer_tool]
179
- + web_tools
180
- + doc_tools
181
- + ([image_generator] if image_generator else [])
182
  )
183
- if tool is not None
184
- ]
185
 
186
- # Log available tools
187
- print(f"Loaded {len(all_tools)} tools successfully")
188
 
189
- # Create and return the agent
190
- return CodeAgent(
191
- model=model,
192
- tools=all_tools,
193
- max_steps=15,
194
- verbosity_level=2,
195
- additional_authorized_imports=AUTHORIZED_IMPORTS,
196
- planning_interval=4,
197
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
 
200
  if __name__ == "__main__":
201
- agent_instance = create_agent()
202
- GradioUI(agent_instance).launch()
 
 
 
 
 
 
 
 
1
  import os
2
+ import re
3
+ import shutil
4
+ import datetime
5
+ import mimetypes
6
+ from typing import Optional, List, Dict, Tuple # More specific typing
7
+
8
  from dotenv import load_dotenv
9
  from huggingface_hub import login
10
+ import gradio as gr
11
 
12
  from scripts.text_inspector_tool import TextInspectorTool
13
  from scripts.text_web_browser import (
 
25
 
26
  from smolagents import (
27
  CodeAgent,
28
+ HfApiModel,
29
  LiteLLMModel,
30
+ OpenAIServerModel,
31
+ TransformersModel,
32
  GoogleSearchTool,
33
  Tool,
 
34
  )
35
+ from smolagents.agent_types import AgentText, AgentImage, AgentAudio
36
+ from smolagents.gradio_ui import pull_messages_from_step, handle_agent_output_types
37
+
38
 
39
+ # ------------------------ Configuration and Setup ------------------------
40
  AUTHORIZED_IMPORTS = [
41
  "requests",
42
  "zipfile",
 
85
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
86
  "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
87
  )
 
88
  BROWSER_CONFIG = {
89
  "viewport_size": 1024 * 5,
90
  "downloads_folder": "downloads_folder",
 
97
 
98
  CUSTOM_ROLE_CONVERSIONS = {"tool-call": "assistant", "tool-response": "user"}
99
 
100
+ ALLOWED_FILE_TYPES = [
101
+ "application/pdf",
102
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
103
+ "text/plain",
104
+ "text/markdown",
105
+ "application/json",
106
+ "image/png",
107
+ "image/webp",
108
+ "image/jpeg",
109
+ "image/gif",
110
+ "video/mp4",
111
+ "audio/mpeg",
112
+ "audio/wav",
113
+ "audio/ogg",
114
+ ]
115
+
116
 
117
  def setup_environment():
118
+ """
119
+ Initialize environment variables and authenticate with Hugging Face Hub.
120
+ """
121
  load_dotenv(override=True)
122
+ hf_token = os.getenv("HF_TOKEN") # Get token once
123
+ if hf_token:
124
+ login(hf_token)
125
+ print("HF_TOKEN (last 10 characters):", hf_token[-10:])
126
  else:
127
+ print("HF_TOKEN not found in environment variables.")
128
 
 
 
129
 
130
+ # ------------------------ Model and Tool Management ------------------------
131
+ class ModelManager:
132
+ """
133
+ Manages model loading and initialization.
134
+ """
135
 
136
+ @staticmethod
137
+ def load_model(chosen_inference: str, model_id: str, key_manager=None):
138
+ """
139
+ Load the specified model with appropriate configuration.
140
 
141
+ Args:
142
+ chosen_inference (str): The type of inference to use (e.g., "hf_api", "openai").
143
+ model_id (str): The ID of the model to load.
144
+ key_manager: (Optional) Key manager for API keys. Required for OpenAI.
145
 
146
+ Returns:
147
+ An instance of the specified model class.
148
 
149
+ Raises:
150
+ ValueError: If an invalid inference type is specified or if the key manager
151
+ is missing for OpenAI models.
152
+ Exception: If the model fails to load.
153
+ """
154
+ try:
155
+ if chosen_inference == "hf_api":
156
+ return HfApiModel(model_id=model_id)
157
+ elif chosen_inference == "hf_api_provider":
158
+ return HfApiModel(provider="together")
159
+ elif chosen_inference == "litellm":
160
+ return LiteLLMModel(model_id=model_id)
161
+ elif chosen_inference == "openai":
162
+ if not key_manager:
163
+ raise ValueError("Key manager required for OpenAI model")
164
+ return OpenAIServerModel(
165
+ model_id=model_id, api_key=key_manager.get_key("openai_api_key")
166
+ )
167
+ elif chosen_inference == "transformers":
168
+ return TransformersModel(
169
+ model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct",
170
+ device_map="auto",
171
+ max_new_tokens=1000,
172
+ )
173
+ else:
174
+ raise ValueError(f"Invalid inference type: {chosen_inference}")
175
+ except Exception as e:
176
+ print(f"✗ Couldn't load model: {e}")
177
+ raise
178
 
179
+
180
+ class ToolRegistry:
181
+ """Manages tool initialization and organization."""
182
+
183
+ @staticmethod
184
+ def load_web_tools(model, browser, text_limit=20000) -> List[Tool]:
185
+ """
186
+ Initialize and return web-related tools.
187
+
188
+ Args:
189
+ model: The language model to use.
190
+ browser: The web browser instance.
191
+ text_limit (int): The maximum text length for the text inspector tool.
192
+
193
+ Returns:
194
+ A list of web-related tools.
195
+ """
196
+ return [
197
+ GoogleSearchTool(provider="serper"),
198
+ VisitTool(browser),
199
+ PageUpTool(browser),
200
+ PageDownTool(browser),
201
+ FinderTool(browser),
202
+ FindNextTool(browser),
203
+ ArchiveSearchTool(browser),
204
+ TextInspectorTool(model, text_limit),
 
 
 
 
205
  ]
 
 
 
206
 
207
+ @staticmethod
208
+ def load_document_tools() -> List[Tool]:
209
+ """
210
+ Initialize and return document processing tools.
211
+
212
+ Returns:
213
+ List of document tools.
214
+ """
215
+ return [FrontmatterGeneratorTool(), TextCleanerTool()]
216
+
217
+ @staticmethod
218
+ def load_image_generation_tools() -> Optional[Tool]:
219
+ """
220
+ Initialize and return image generation tools.
221
+
222
+ Returns:
223
+ The image generation tool or None if initialization fails.
224
+ """
225
+ try:
226
+ return Tool.from_space(
227
+ space_id="xkerser/FLUX.1-dev",
228
+ name="image_generator",
229
+ description="Generates high-quality AgentImage using the FLUX.1-dev model based on text prompts.",
230
+ )
231
+ except Exception as e:
232
+ print(f"✗ Couldn't initialize image generation tool: {e}")
233
+ return None
234
+
235
+
236
+ # ------------------------ Agent Creation and Execution ------------------------
237
+ def create_agent() -> CodeAgent:
238
+ """
239
+ Creates a fresh agent instance with configured tools.
240
+
241
+ Returns:
242
+ CodeAgent: Configured agent ready for use.
243
+
244
+ Raises:
245
+ ValueError: If tool validation fails.
246
+ RuntimeError: If agent creation fails.
247
+ """
248
  try:
249
+ # Initialize model
250
+ model = LiteLLMModel(
251
+ custom_role_conversions=CUSTOM_ROLE_CONVERSIONS,
252
+ model_id="openrouter/deepseek/deepseek-chat-v3-0324:free",
253
  )
254
+
255
+ # Initialize tools
256
+ text_limit = 30000
257
+ browser = SimpleTextBrowser(**BROWSER_CONFIG)
258
+
259
+ # Create tool instances with proper error handling
260
+ web_tools = ToolRegistry.load_web_tools(model, browser, text_limit)
261
+ doc_tools = [] # Initialize as empty list
262
+ image_generator = None # Initialize as None
263
+
264
+ try:
265
+ doc_tools = ToolRegistry.load_document_tools()
266
+ except AssertionError as e:
267
+ print(f"Warning: Error loading document tools: {str(e)}")
268
+ print("Attempting to continue with available tools...")
269
+
270
+ image_generator = ToolRegistry.load_image_generation_tools()
271
+
272
+ # Combine available tools (filter out None values)
273
+ all_tools = [visualizer] + web_tools + doc_tools
274
+ if image_generator: # Add only if it's not None
275
+ all_tools.append(image_generator)
276
+
277
+ # Log available tools
278
+ print(f"Loaded {len(all_tools)} tools successfully")
279
+ for tool in all_tools:
280
+ print(f"- {tool.name}: {tool.description[:50]}...")
281
+
282
+ return CodeAgent(
283
+ model=model,
284
+ tools=all_tools,
285
+ max_steps=12,
286
+ verbosity_level=2,
287
+ additional_authorized_imports=AUTHORIZED_IMPORTS,
288
+ planning_interval=4,
289
+ )
290
+ except Exception as e:
291
+ print(f"Failed to create agent: {e}")
292
+ raise RuntimeError(f"Agent creation failed: {e}")
293
+
294
+
295
+ def stream_to_gradio(
296
+ agent, task: str, reset_agent_memory: bool = False, additional_args=None
297
+ ):
298
+ """Runs an agent with the given task and streams messages as Gradio ChatMessages."""
299
+ try:
300
+ for step_log in agent.run(
301
+ task, stream=True, reset=reset_agent_memory, additional_args=additional_args
302
+ ):
303
+ for message in pull_messages_from_step(step_log):
304
+ yield message
305
+
306
+ # Process final answer with comprehensive media output
307
+ final_answer = step_log # Last log is the run's final_answer
308
+ final_answer = handle_agent_output_types(final_answer)
309
+
310
+ # Output handling based on type
311
+ if isinstance(final_answer, AgentText):
312
+ yield gr.ChatMessage(
313
+ role="assistant",
314
+ content=f"Final answer:\n{final_answer.to_string()}\n",
315
+ )
316
+ elif isinstance(final_answer, AgentImage):
317
+ yield gr.ChatMessage(
318
+ role="assistant",
319
+ content={"image": final_answer.to_string(), "type": "file"},
320
+ )
321
+ elif isinstance(final_answer, AgentAudio):
322
+ yield gr.ChatMessage(
323
+ role="assistant",
324
+ content={"audio": final_answer.to_string(), "type": "file"},
325
+ )
326
+ else:
327
+ yield gr.ChatMessage(
328
+ role="assistant", content=f"Final answer: {str(final_answer)}"
329
+ )
330
  except Exception as e:
331
+ yield gr.ChatMessage(
332
+ role="assistant",
333
+ content=f"Error occurred during processing: {str(e)}\n\nPlease try again with a different query or check your inputs.",
 
 
 
 
 
 
 
 
 
334
  )
 
 
335
 
 
 
336
 
337
+ # ------------------------ Gradio UI Components ------------------------
338
+ class GradioUI:
339
+ """A one-line interface to launch your agent in Gradio."""
340
+
341
+ def __init__(self, file_upload_folder: str | None = None):
342
+ """Initialize the Gradio UI with optional file upload functionality."""
343
+ self.file_upload_folder = file_upload_folder
344
+
345
+ if self.file_upload_folder is not None:
346
+ os.makedirs(self.file_upload_folder, exist_ok=True)
347
+
348
+ def interact_with_agent(
349
+ self, prompt: str, messages: List[Dict], session_state: Dict
350
+ ) -> List[Dict]: # Type hints
351
+ """Main interaction handler with the agent."""
352
+
353
+ # Get or create session-specific agent with cache persistence
354
+ if "agent" not in session_state:
355
+ try:
356
+ session_state["agent"] = create_agent()
357
+ session_state["creation_time"] = datetime.datetime.now()
358
+ session_state["request_count"] = 0
359
+ except Exception as e:
360
+ messages.append(
361
+ gr.ChatMessage(
362
+ role="assistant",
363
+ content=f"Error initializing agent: {str(e)}\n\nPlease refresh the page and try again.",
364
+ )
365
+ )
366
+ yield messages
367
+ return
368
+
369
+ session_state["request_count"] += 1
370
+
371
+ # Add user message
372
+ messages.append(gr.ChatMessage(role="user", content=prompt))
373
+ yield messages
374
+
375
+ try:
376
+ # Check if agent should be reset (e.g., if too many requests)
377
+ reset_needed = session_state["request_count"] > 15
378
+
379
+ for msg in stream_to_gradio(
380
+ session_state["agent"], task=prompt, reset_agent_memory=reset_needed
381
+ ):
382
+ messages.append(msg)
383
+ yield messages
384
+
385
+ # If we reset the agent memory, update the request count
386
+ if reset_needed:
387
+ session_state["request_count"] = 1
388
+
389
+ except Exception as e:
390
+ messages.append(
391
+ gr.ChatMessage(
392
+ role="assistant",
393
+ content=f"Error processing your request: {str(e)}\n\nPlease try again with a different query.",
394
+ )
395
+ )
396
+ yield messages
397
+
398
+ def upload_file(self, file, file_uploads_log):
399
+ """Handle file uploads with validation, security, and clear feedback."""
400
+ if file is None:
401
+ return gr.Textbox("No file uploaded", visible=True), file_uploads_log
402
+
403
+ try:
404
+ # Get file size and check limit before processing
405
+ file_size_mb = os.path.getsize(file.name) / (1024 * 1024) # Size in MB
406
+ max_file_size_mb = 50 # Define the limit
407
+
408
+ if file_size_mb > max_file_size_mb:
409
+ return (
410
+ gr.Textbox(
411
+ f"❌ File size ({file_size_mb:.1f} MB) exceeds {max_file_size_mb} MB limit.",
412
+ visible=True,
413
+ ),
414
+ file_uploads_log,
415
+ )
416
+
417
+ # Check MIME type
418
+ mime_type, _ = mimetypes.guess_type(file.name) # Correct unpacking
419
+ if mime_type not in ALLOWED_FILE_TYPES:
420
+ return (
421
+ gr.Textbox(
422
+ f"❌ File type '{mime_type or 'unknown'}' is not allowed. Supported types: {', '.join(t.split('/')[-1] for t in ALLOWED_FILE_TYPES)}",
423
+ visible=True,
424
+ ),
425
+ file_uploads_log,
426
+ )
427
+
428
+ # Sanitize file name with better pattern
429
+ original_name = os.path.basename(file.name)
430
+ sanitized_name = re.sub(r"[^\w\-.]", "", original_name)
431
+
432
+ # Save the uploaded file
433
+ file_path = os.path.join(self.file_upload_folder, sanitized_name)
434
+ shutil.copy(file.name, file_path)
435
+
436
+ return gr.Textbox(
437
+ f"✓ File uploaded successfully: {os.path.basename(file_path)} ({file_size_mb:.1f} MB)",
438
+ visible=True,
439
+ ), file_uploads_log + [file_path]
440
+
441
+ except Exception as e:
442
+ return (
443
+ gr.Textbox(f"❌ Upload error: {str(e)}", visible=True),
444
+ file_uploads_log,
445
+ )
446
+
447
+ def log_user_message(self, text_input, file_uploads_log):
448
+ """Process user message and handle file references with proper agent types."""
449
+ message = text_input
450
+
451
+ if len(file_uploads_log) > 0:
452
+ # Group files by type for better agent processing
453
+ file_info = {}
454
+ for file_path in file_uploads_log:
455
+ ext = os.path.splitext(file_path)[1].lower()
456
+ if ext in [".jpg", ".jpeg", ".png", ".gif", ".webp"]:
457
+ category = "images"
458
+ elif ext in [".mp3", ".wav", ".ogg"]:
459
+ category = "audio"
460
+ else:
461
+ category = "documents"
462
+
463
+ if category not in file_info:
464
+ file_info[category] = []
465
+ file_info[category].append(os.path.basename(file_path))
466
+
467
+ # Format file information for the agent
468
+ file_message = "\nYou have been provided with these files:\n"
469
+ for category, files in file_info.items():
470
+ file_message += f"- {category.capitalize()}: {', '.join(files)}\n"
471
+
472
+ message += file_message
473
+ message += "\nUse inspect_file_as_text for documents, visualizer for images, and the appropriate tools for audio files."
474
+
475
+ return (
476
+ message,
477
+ gr.Textbox(value="", interactive=False, placeholder="Processing..."),
478
+ gr.Button(interactive=False),
479
+ )
480
+
481
+ def detect_device(self, request: gr.Request):
482
+ """Detect whether the user is on mobile or desktop device."""
483
+ if not request:
484
+ return "Unknown device" # Handle case where request is none.
485
+
486
+ # Method 1: Check sec-ch-ua-mobile header
487
+ is_mobile_header = request.headers.get("sec-ch-ua-mobile")
488
+ if is_mobile_header:
489
+ return "Mobile" if "?1" in is_mobile_header else "Desktop"
490
+
491
+ # Method 2: Check user-agent string
492
+ user_agent = request.headers.get("user-agent", "").lower()
493
+ mobile_keywords = ["android", "iphone", "ipad", "mobile", "phone"]
494
+
495
+ if any(keyword in user_agent for keyword in mobile_keywords):
496
+ return "Mobile"
497
+
498
+ # Method 3: Check platform
499
+ platform = request.headers.get("sec-ch-ua-platform", "").lower()
500
+ if platform:
501
+ if platform in ['"android"', '"ios"']:
502
+ return "Mobile"
503
+ if platform in ['"windows"', '"macos"', '"linux"']:
504
+ return "Desktop"
505
+
506
+ # Default case if no clear indicators
507
+ return "Desktop"
508
+
509
+ def launch(self, **kwargs):
510
+ """Launch the Gradio UI with responsive layout."""
511
+ with gr.Blocks(theme="ocean", fill_height=True) as demo:
512
+ # Different layouts for mobile and computer devices
513
+ @gr.render()
514
+ def layout(request: gr.Request):
515
+ device = self.detect_device(request)
516
+ print(f"device - {device}")
517
+ # Render layout with sidebar
518
+ if device == "Desktop":
519
+ return self._create_desktop_layout()
520
+ return self._create_mobile_layout()
521
+
522
+ demo.queue(max_size=20).launch(
523
+ debug=True, **kwargs
524
+ ) # Add queue with reasonable size
525
+
526
+ def _create_desktop_layout(self):
527
+ """Create the desktop layout with sidebar and enhanced styling."""
528
+ with gr.Blocks(fill_height=True) as sidebar_demo:
529
+ with gr.Sidebar():
530
+ gr.Markdown(
531
+ """# 🔍 OpenDeepResearch
532
+ ### Smolagents + Document Tools
533
+ """
534
+ )
535
+ with gr.Group():
536
+ gr.Markdown("What can I help you with today?", container=True)
537
+ text_input = gr.Textbox(
538
+ lines=4,
539
+ label="Your request",
540
+ container=False,
541
+ placeholder="Enter your question or task here...",
542
+ show_label=False,
543
+ )
544
+
545
+ with gr.Row():
546
+ clear_btn = gr.Button("Clear", variant="secondary")
547
+ launch_research_btn = gr.Button("Run", variant="primary")
548
+
549
+ # File upload section with better labeling
550
+ if self.file_upload_folder is not None:
551
+ with gr.Group():
552
+ gr.Markdown("📎 Upload Documents")
553
+ upload_file = gr.File(
554
+ label="Upload files for analysis",
555
+ file_types=[
556
+ "pdf",
557
+ "docx",
558
+ "txt",
559
+ "md",
560
+ "csv",
561
+ "xlsx",
562
+ "jpg",
563
+ "png",
564
+ ],
565
+ file_count="multiple",
566
+ )
567
+ upload_status = gr.Textbox(
568
+ label="Upload Status", interactive=False, visible=False
569
+ )
570
+ file_uploads_log = gr.State([])
571
+
572
+ # Show uploaded files list
573
+ uploaded_files_display = gr.Markdown("No files uploaded yet")
574
+
575
+ upload_file.change(
576
+ self.upload_file,
577
+ [upload_file, file_uploads_log],
578
+ [upload_status, file_uploads_log],
579
+ ).then(
580
+ lambda files: (
581
+ "Uploaded Files:\n"
582
+ + "\n".join([f"- {os.path.basename(f)}" for f in files])
583
+ if files
584
+ else "No files uploaded yet"
585
+ ),
586
+ [file_uploads_log],
587
+ [uploaded_files_display],
588
+ )
589
+
590
+ gr.HTML("<br><hr><h4><center>Powered by:</center></h4>")
591
+ with gr.Row():
592
+ gr.HTML(
593
+ """
594
+ <div style="display: flex; align-items: center; justify-content: center; gap: 8px; font-family: system-ui, -apple-system, sans-serif;">
595
+ <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png"
596
+ style="width: 32px; height: 32px; object-fit: contain;" alt="logo">
597
+ <a target="_blank" href="https://github.com/huggingface/smolagents">
598
+ <b>huggingface/smolagents</b>
599
+ </a>
600
+ </div>
601
+ """
602
+ )
603
+
604
+ # Main chat area with improved styling
605
+ session_state = gr.State({})
606
+ stored_messages = gr.State([])
607
+ if "file_uploads_log" not in locals():
608
+ file_uploads_log = gr.State([])
609
+
610
+ chatbot = gr.Chatbot(
611
+ label="OpenDeepResearch Assistant",
612
+ type="messages",
613
+ avatar_images=(
614
+ None,
615
+ "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
616
+ ),
617
+ resizeable=True,
618
+ show_copy_button=True,
619
+ scale=1,
620
+ elem_id="my-chatbot",
621
+ height=700,
622
+ )
623
+
624
+ # Connect clear button
625
+ clear_btn.click(
626
+ lambda: ([], [], {"agent": session_state.get("agent")}),
627
+ None,
628
+ [chatbot, stored_messages, session_state],
629
+ )
630
+
631
+ # Connect event handlers
632
+ self._connect_event_handlers(
633
+ text_input,
634
+ launch_research_btn,
635
+ file_uploads_log,
636
+ stored_messages,
637
+ chatbot,
638
+ session_state,
639
+ )
640
+
641
+ return sidebar_demo
642
+
643
+ def _create_mobile_layout(self):
644
+ """Create the mobile layout (simpler without sidebar)."""
645
+ with gr.Blocks(fill_height=True) as simple_demo:
646
+ gr.Markdown("""#OpenDeepResearch - free the AI agents!""")
647
+ # Add session state to store session-specific data
648
+ session_state = gr.State({})
649
+ stored_messages = gr.State([])
650
+ file_uploads_log = gr.State([])
651
+
652
+ chatbot = gr.Chatbot(
653
+ label="open-Deep-Research",
654
+ type="messages",
655
+ avatar_images=(
656
+ None,
657
+ "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
658
+ ),
659
+ resizeable=True,
660
+ scale=1,
661
+ )
662
+
663
+ # If an upload folder is provided, enable the upload feature
664
+ if self.file_upload_folder is not None:
665
+ upload_file = gr.File(label="Upload a file")
666
+ upload_status = gr.Textbox(
667
+ label="Upload Status", interactive=False, visible=False
668
+ )
669
+ upload_file.change(
670
+ self.upload_file,
671
+ [upload_file, file_uploads_log],
672
+ [upload_status, file_uploads_log],
673
+ )
674
+
675
+ text_input = gr.Textbox(
676
+ lines=1,
677
+ label="What's on your mind mate?",
678
+ placeholder="Chuck in a question and we'll take care of the rest",
679
+ )
680
+ launch_research_btn = gr.Button("Run", variant="primary")
681
+
682
+ self._connect_event_handlers(
683
+ text_input,
684
+ launch_research_btn,
685
+ file_uploads_log,
686
+ stored_messages,
687
+ chatbot,
688
+ session_state,
689
+ )
690
+
691
+ return simple_demo
692
+
693
+ def _connect_event_handlers(
694
+ self,
695
+ text_input,
696
+ launch_research_btn,
697
+ file_uploads_log,
698
+ stored_messages,
699
+ chatbot,
700
+ session_state,
701
+ ):
702
+ """Connect the event handlers for input elements."""
703
+ # Connect text input submit event
704
+ text_input.submit(
705
+ self.log_user_message,
706
+ [text_input, file_uploads_log],
707
+ [stored_messages, text_input, launch_research_btn],
708
+ ).then(
709
+ self.interact_with_agent,
710
+ [stored_messages, chatbot, session_state],
711
+ [chatbot],
712
+ ).then(
713
+ lambda: (
714
+ gr.Textbox(
715
+ interactive=True,
716
+ placeholder="Enter your prompt here and press the button",
717
+ ),
718
+ gr.Button(interactive=True),
719
+ ),
720
+ None,
721
+ [text_input, launch_research_btn],
722
+ )
723
+
724
+ # Connect button click event
725
+ launch_research_btn.click(
726
+ self.log_user_message,
727
+ [text_input, file_uploads_log],
728
+ [stored_messages, text_input, launch_research_btn],
729
+ ).then(
730
+ self.interact_with_agent,
731
+ [stored_messages, chatbot, session_state],
732
+ [chatbot],
733
+ ).then(
734
+ lambda: (
735
+ gr.Textbox(
736
+ interactive=True,
737
+ placeholder="Enter your prompt here and press the button",
738
+ ),
739
+ gr.Button(interactive=True),
740
+ ),
741
+ None,
742
+ [text_input, launch_research_btn],
743
+ )
744
+
745
+
746
+ # ------------------------ Execution ------------------------
747
+ def main():
748
+ """Main entry point for the application."""
749
+ # Initialize environment
750
+ setup_environment()
751
+
752
+ # Ensure downloads folder exists
753
+ os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
754
+
755
+ # Launch UI
756
+ GradioUI(file_upload_folder="uploaded_files").launch()
757
 
758
 
759
  if __name__ == "__main__":
760
+ main()