Spaces:
Runtime error
Runtime error
| import argparse | |
| import json | |
| import os | |
| import threading | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import List, Optional | |
| import datasets | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| from huggingface_hub import login | |
| import gradio as gr | |
| from duckduckgo_search import DDGS | |
| from scripts.reformulator import prepare_response | |
| from scripts.run_agents import ( | |
| get_single_file_description, | |
| get_zip_description, | |
| ) | |
| from scripts.text_inspector_tool import TextInspectorTool | |
| from smolagents.tools import Tool | |
| from scripts.text_web_browser import ( | |
| ArchiveSearchTool, | |
| FinderTool, | |
| FindNextTool, | |
| PageDownTool, | |
| PageUpTool, | |
| VisitTool, | |
| SimpleTextBrowser, | |
| ) | |
| from scripts.visual_qa import visualizer | |
| from tqdm import tqdm | |
| from smolagents import ( | |
| CodeAgent, | |
| HfApiModel, | |
| LiteLLMModel, | |
| Model, | |
| ToolCallingAgent, | |
| ) | |
| from smolagents.agent_types import AgentText, AgentImage, AgentAudio | |
| from smolagents.gradio_ui import pull_messages_from_step, handle_agent_output_types | |
| AUTHORIZED_IMPORTS = [ | |
| "requests", | |
| "zipfile", | |
| "os", | |
| "pandas", | |
| "numpy", | |
| "sympy", | |
| "json", | |
| "bs4", | |
| "pubchempy", | |
| "xml", | |
| "yahoo_finance", | |
| "Bio", | |
| "sklearn", | |
| "scipy", | |
| "pydub", | |
| "io", | |
| "PIL", | |
| "chess", | |
| "PyPDF2", | |
| "pptx", | |
| "torch", | |
| "datetime", | |
| "fractions", | |
| "csv", | |
| ] | |
| import os | |
| # With this updated version: | |
| #from huggingface_hub import configure_http_backend | |
| #from huggingface_hub.http import httpx_backend # Explicit backend import | |
| #configure_http_backend(factory=httpx_backend.factory) # Correct argument [huggingface.co](https://huggingface.co/docs/huggingface_hub/en/guides/http#http-backends) | |
| # Set environment variables before other imports | |
| os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "300" # 5 minute timeout | |
| os.environ["HF_HUB_OFFLINE"] = "0" # Disable offline mode | |
| load_dotenv(override=True) | |
| login(os.getenv("HF_TOKEN")) | |
| append_answer_lock = threading.Lock() | |
| SET = "validation" | |
| custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"} | |
| ### LOAD EVALUATION DATASET | |
| eval_ds = datasets.load_dataset("gaia-benchmark/GAIA", "2023_all")[SET] | |
| eval_ds = eval_ds.rename_columns({"Question": "question", "Final answer": "true_answer", "Level": "task"}) | |
| def preprocess_file_paths(row): | |
| if len(row["file_name"]) > 0: | |
| row["file_name"] = f"data/gaia/{SET}/" + row["file_name"] | |
| return row | |
| eval_ds = eval_ds.map(preprocess_file_paths) | |
| eval_df = pd.DataFrame(eval_ds) | |
| print("Loaded evaluation dataset:") | |
| print(eval_df["task"].value_counts()) | |
| user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0" | |
| BROWSER_CONFIG = { | |
| "viewport_size": 1024 * 5, | |
| "downloads_folder": "downloads_folder", | |
| "request_kwargs": { | |
| "headers": {"User-Agent": user_agent}, | |
| "timeout": 300, | |
| }, | |
| } | |
| os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True) | |
| # Custom OpenAI configuration | |
| model = LiteLLMModel( | |
| "openai/custom-gpt", | |
| custom_role_conversions=custom_role_conversions, | |
| api_key=os.getenv("OPENAI_API_KEY"), | |
| api_base=os.getenv("CUSTOM_OPENAI_API_BASE"), | |
| temperature=0.1, | |
| frequency_penalty=0.2, | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": """ALWAYS format code responses with: | |
| ```py | |
| # Your code | |
| ``` | |
| Use markdown for text and strict triple-backtick for code blocks""" | |
| } | |
| ] | |
| ) | |
| text_limit = 20000 | |
| ti_tool = TextInspectorTool(model, text_limit) | |
| browser = SimpleTextBrowser(**BROWSER_CONFIG) | |
| class DuckDuckGoSearchTool(Tool): | |
| """Search tool using DuckDuckGo""" | |
| name = "web_search" | |
| description = "Search the web using DuckDuckGo (current information)" | |
| inputs = { | |
| "query": { | |
| "type": "string", | |
| "description": "Search query terms", | |
| "required": True | |
| } | |
| } | |
| output_type = "string" | |
| def __init__(self, max_results: int = 5): | |
| super().__init__() | |
| self.max_results = max_results | |
| def forward(self, query: str) -> str: # <-- Correct method name and signature | |
| """Execute DuckDuckGo search""" | |
| try: | |
| with DDGS(timeout=30) as ddgs: | |
| results = list(ddgs.text( | |
| keywords=query, | |
| max_results=self.max_results, | |
| region='wt-wt' | |
| )) | |
| return "\n\n".join([ | |
| f"β’ {res['title']}\n URL: {res['href']}\n {res['body'][:200]}..." | |
| for res in results | |
| ]) | |
| except Exception as e: | |
| return f"Search error: {str(e)}" | |
| WEB_TOOLS = [ | |
| DuckDuckGoSearchTool(max_results=5), | |
| VisitTool(browser), | |
| PageUpTool(browser), | |
| PageDownTool(browser), | |
| FinderTool(browser), | |
| FindNextTool(browser), | |
| ArchiveSearchTool(browser), | |
| TextInspectorTool(model, text_limit), | |
| ] | |
| from smolagents.parsers import CodeParser | |
| import re | |
| class RobustCodeParser(CodeParser): | |
| def extract_code(self, response: str) -> str: | |
| try: | |
| return super().extract_code(response) | |
| except ValueError: | |
| # Fallback pattern matching | |
| code_match = re.search(r"```(?:python|py)?\n(.*?)\n```", response, re.DOTALL) | |
| if code_match: | |
| return code_match.group(1).strip() | |
| raise ValueError(f"Invalid code format in response:\n{response}") | |
| # Replace in agent creation: | |
| # Agent creation in a factory function | |
| def create_agent(): | |
| return CodeAgent( | |
| model=model, | |
| tools=[visualizer] + WEB_TOOLS, | |
| max_steps=7, # Increased from 5 | |
| verbosity_level=3, # Higher debug info | |
| additional_authorized_imports=AUTHORIZED_IMPORTS, | |
| planning_interval=3, | |
| code_block_delimiters=("```py", "```"), # Explicit code formatting [github.com] | |
| code_clean_pattern=r"^[\s\S]*?(```py\n[\s\S]*?\n```)", # Improved regex | |
| enforce_code_format=True, | |
| parser=RobustCodeParser() # Explicitly use RobustCodeParser here! | |
| ) | |
| document_inspection_tool = TextInspectorTool(model, 20000) | |
| def stream_to_gradio( | |
| agent, | |
| task: str, | |
| reset_agent_memory: bool = False, | |
| additional_args: Optional[dict] = None, | |
| ): | |
| """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages.""" | |
| for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args): | |
| for message in pull_messages_from_step( | |
| step_log, | |
| ): | |
| yield message | |
| final_answer = step_log # Last log is the run's final_answer | |
| final_answer = handle_agent_output_types(final_answer) | |
| if isinstance(final_answer, AgentText): | |
| yield gr.ChatMessage( | |
| role="assistant", | |
| content=f"**Final answer:**\n{final_answer.to_string()}\n", | |
| ) | |
| elif isinstance(final_answer, AgentImage): | |
| yield gr.ChatMessage( | |
| role="assistant", | |
| content={"path": final_answer.to_string(), "mime_type": "image/png"}, | |
| ) | |
| elif isinstance(final_answer, AgentAudio): | |
| yield gr.ChatMessage( | |
| role="assistant", | |
| content={"path": final_answer.to_string(), "mime_type": "audio/wav"}, | |
| ) | |
| else: | |
| yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}") | |
| class GradioUI: | |
| """A one-line interface to launch your agent in Gradio""" | |
| def __init__(self, file_upload_folder: str | None = None): | |
| self.file_upload_folder = file_upload_folder | |
| if self.file_upload_folder is not None: | |
| if not os.path.exists(file_upload_folder): | |
| os.mkdir(file_upload_folder) | |
| def interact_with_agent(self, prompt, messages, session_state): | |
| if 'agent' not in session_state: | |
| session_state['agent'] = create_agent() | |
| messages.append(gr.ChatMessage(role="user", content=prompt)) | |
| yield messages | |
| for msg in stream_to_gradio(session_state['agent'], task=prompt, reset_agent_memory=False): | |
| messages.append(msg) | |
| yield messages | |
| yield messages | |
| def upload_file( | |
| self, | |
| file, | |
| file_uploads_log, | |
| allowed_file_types=[ | |
| "application/pdf", | |
| "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | |
| "text/plain", | |
| ], | |
| ): | |
| if file is None: | |
| return gr.Textbox("No file uploaded", visible=True), file_uploads_log | |
| try: | |
| mime_type, _ = mimetypes.guess_type(file.name) | |
| except Exception as e: | |
| return gr.Textbox(f"Error: {e}", visible=True), file_uploads_log | |
| if mime_type not in allowed_file_types: | |
| return gr.Textbox("File type disallowed", visible=True), file_uploads_log | |
| original_name = os.path.basename(file.name) | |
| sanitized_name = re.sub(r"[^\w\-.]", "_", original_name) | |
| type_to_ext = {} | |
| for ext, t in mimetypes.types_map.items(): | |
| if t not in type_to_ext: | |
| type_to_ext[t] = ext | |
| sanitized_name = sanitized_name.split(".")[:-1] | |
| sanitized_name.append("" + type_to_ext[mime_type]) | |
| sanitized_name = "".join(sanitized_name) | |
| file_path = os.path.join(self.file_upload_folder, os.path.basename(sanitized_name)) | |
| shutil.copy(file.name, file_path) | |
| return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path] | |
| def log_user_message(self, text_input, file_uploads_log): | |
| return ( | |
| text_input | |
| + ( | |
| f"\nYou have been provided with these files, which might be helpful or not: {file_uploads_log}" | |
| if len(file_uploads_log) > 0 | |
| else "" | |
| ), | |
| "", | |
| ) | |
| def launch(self, **kwargs): | |
| with gr.Blocks(theme="ocean", fill_height=True) as demo: | |
| gr.Markdown("""# Open Deep Research - AI Agent Interface | |
| Advanced question answering using DuckDuckGo search and custom AI models""") | |
| session_state = gr.State({}) | |
| stored_messages = gr.State([]) | |
| file_uploads_log = gr.State([]) | |
| chatbot = gr.Chatbot( | |
| label="Research Agent", | |
| type="messages", | |
| avatar_images=( | |
| None, | |
| "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png", | |
| ), | |
| resizeable=True, | |
| scale=1, | |
| ) | |
| if self.file_upload_folder is not None: | |
| upload_file = gr.File(label="Upload a file") | |
| upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False) | |
| upload_file.change( | |
| self.upload_file, | |
| [upload_file, file_uploads_log], | |
| [upload_status, file_uploads_log], | |
| ) | |
| text_input = gr.Textbox(lines=1, label="Enter your question") | |
| text_input.submit( | |
| self.log_user_message, | |
| [text_input, file_uploads_log], | |
| [stored_messages, text_input], | |
| ).then( | |
| self.interact_with_agent, | |
| [stored_messages, chatbot, session_state], | |
| [chatbot] | |
| ) | |
| demo.launch(debug=True, share=False, **kwargs) | |
| if __name__ == "__main__": | |
| GradioUI().launch() |