Final_Assignment_Template_CURR

Sleeping

App Files Files Community

mdicio commited on May 17, 2025

Commit

8c5bbef

1 Parent(s): 81917a3

try

Browse files

Files changed (6) hide show

agent.py +287 -0
app.py +67 -35
app_template.py +196 -0
requirements copy.txt +23 -0
requirements.txt +22 -1
tools.py +1114 -0

agent.py ADDED Viewed

	@@ -0,0 +1,287 @@

+import os
+from dotenv import load_dotenv
+# Import models from SmolaAgents
+from smolagents import CodeAgent, LiteLLMModel, OpenAIServerModel
+# Import SmolaAgents tools
+from smolagents.default_tools import FinalAnswerTool, PythonInterpreterTool
+# Import custom tools
+from tools import (
+    AddDocumentToVectorStoreTool,
+    ArxivSearchTool,
+    DownloadFileFromLinkTool,
+    DuckDuckGoSearchTool,
+    QueryVectorStoreTool,
+    ReadFileContentTool,
+    TranscibeVideoFileTool,
+    TranscribeAudioTool,
+    VisitWebpageTool,
+    WikipediaSearchTool,
+    image_question_answering
+)
+# Import utility functions
+from utils import extract_final_answer, replace_tool_mentions
+class BoomBot:
+    def __init__(self, provider="meta"):
+        """
+        Initialize the BoomBot with the specified provider.
+        Args:
+            provider (str): The model provider to use (e.g., "groq", "qwen", "gemma", "anthropic", "deepinfra", "meta")
+        """
+        load_dotenv()
+        self.provider = provider
+        self.model = self._initialize_model()
+        self.agent = self._create_agent()
+    def _initialize_model(self):
+        """
+        Initialize the appropriate model based on the provider.
+        Returns:
+            The initialized model object
+        """
+        if self.provider == "qwen":
+            qwen_model = "ollama_chat/qwen3:8b"
+            return LiteLLMModel(
+                model_id=qwen_model,
+                device="cuda",
+                num_ctx=32768,
+                temperature=0.6,
+                top_p=0.95,
+            )
+        elif self.provider == "gemma":
+            gemma_model = "ollama_chat/gemma3:12b-it-qat"
+            return LiteLLMModel(
+                model_id=gemma_model,
+                num_ctx=65536,
+                temperature=1.0,
+                device="cuda",
+                top_k=64,
+                top_p=0.95,
+                min_p=0.0,
+            )
+        elif self.provider == "anthropic":
+            model_id = "anthropic/claude-3-5-sonnet-latest"
+            return LiteLLMModel(model_id=model_id, temperature=0.6, max_tokens=8192)
+        elif self.provider == "deepinfra":
+            deepinfra_model = "Qwen/Qwen3-235B-A22B"
+            return OpenAIServerModel(
+                model_id=deepinfra_model,
+                api_base="https://api.deepinfra.com/v1/openai",
+                # api_key=os.environ["DEEPINFRA_API_KEY"],
+                flatten_messages_as_text=True,
+                max_tokens=8192,
+                temperature=0.1,
+            )
+        elif self.provider == "meta":
+            meta_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
+            return OpenAIServerModel(
+                model_id=meta_model,
+                api_base="https://api.deepinfra.com/v1/openai",
+                # api_key=os.environ["DEEPINFRA_API_KEY"],
+                flatten_messages_as_text=True,
+                max_tokens=8192,
+                temperature=0.7,
+            )
+        elif self.provider == "groq":
+            # Default to use groq's claude-3-opus or llama-3
+            model_id = "claude-3-opus-20240229"
+            return LiteLLMModel(model_id=model_id, temperature=0.7, max_tokens=8192)
+        else:
+            raise ValueError(f"Unsupported provider: {self.provider}")
+    def _create_agent(self):
+        """
+        Create and configure the agent with all necessary tools.
+        Returns:
+            The configured CodeAgent
+        """
+        # Initialize tools
+        download_file = DownloadFileFromLinkTool()
+        read_file_content = ReadFileContentTool()
+        visit_webpage = VisitWebpageTool()
+        transcribe_video = TranscibeVideoFileTool()
+        transcribe_audio = TranscribeAudioTool()
+        get_wikipedia_info = WikipediaSearchTool()
+        web_searcher = DuckDuckGoSearchTool()
+        arxiv_search = ArxivSearchTool()
+        add_doc_vectorstore = AddDocumentToVectorStoreTool()
+        retrieve_doc_vectorstore = QueryVectorStoreTool()
+        # SmolaAgents default tools
+        python_interpreter = PythonInterpreterTool()
+        final_answer = FinalAnswerTool()
+        # Combine all tools
+        agent_tools = [
+            web_searcher,
+            download_file,
+            read_file_content,
+            visit_webpage,
+            transcribe_video,
+            transcribe_audio,
+            get_wikipedia_info,
+            arxiv_search,
+            add_doc_vectorstore,
+            retrieve_doc_vectorstore,
+            image_question_answering,
+            python_interpreter,
+            final_answer,
+        ]
+        # Additional imports for the Python interpreter
+        additional_imports = [
+            "json",
+            "os",
+            "glob",
+            "pathlib",
+            "pandas",
+            "numpy",
+            "matplotlib",
+            "seaborn",
+            "sklearn",
+            "tqdm",
+            "argparse",
+            "pickle",
+            "io",
+            "re",
+            "datetime",
+            "collections",
+            "math",
+            "random",
+            "csv",
+            "zipfile",
+            "itertools",
+            "functools",
+        ]
+        # Create the agent
+        agent = CodeAgent(
+            tools=agent_tools,
+            max_steps=12,
+            model=self.model,
+            add_base_tools=False,
+            stream_outputs=True,
+            additional_authorized_imports=additional_imports,
+        )
+        # Modify the system prompt
+        modified_prompt = replace_tool_mentions(agent.system_prompt)
+        agent.system_prompt = modified_prompt
+        return agent
+    def _get_system_prompt(self):
+        """
+        Return the system prompt for the agent.
+        Returns:
+            str: The system prompt
+        """
+        return """
+        YOUR BEHAVIOR GUIDELINES:
+          • Do NOT make unfounded assumptions—always ground answers in reliable sources or search results.
+          • For math or puzzles: break the problem into code/math, then solve programmatically.
+        RESEARCH WORKFLOW (in rough priority order):
+          1. SEARCH
+             - Try web_search, wikipedia_search, or arxiv_search first.
+             - Refine your query rather than repeating the exact same terms.
+             - If one search tool yields insufficient info, switch to another before downloading.
+          2. VISIT
+             - Use visit_webpage to extract and read page content when a promising link appears after one of the SEARCH tools.
+             - For each visited link, also download the file and add to the vector store, you might need to query this later, especially if you have a lot of search results.
+          3. EVALUATE
+             - ✅ If the page or search snippet fully answers the question, respond immediately.
+             - ❌ If not, move on to deeper investigation.
+          4. DOWNLOAD
+             - Use download_file_from_link tool on relevant links found (yes you can download webpages as html).
+             - For arXiv papers, target the /pdf/ or DOI link (e.g https://arxiv.org/pdf/2011.10672).
+             -
+          5. INDEX & QUERY
+             - Add downloaded documents to the vector store with add_document_to_vector_store.
+             - Use query_downloaded_documents for detailed answers.
+          6. READ
+             - You have access to a read_file_content tool to read most types of files. You can also directly interact with downloaded files in your python code (do this for csv files and excel files)
+        FALLBACK & ADAPTATION:
+          • If a tool fails, reformulate your query or try a different search method before dropping to download.
+          • If a tool fails multiple times, try a different tool.
+          • For arXiv: you might discover a paper link via web_search tool and then directly use download_file_from_link tool
+        COMMON TOOL CHAINS (conceptual outlines):
+        These are just guidelines, each task might require a unique workflow.
+        A tool can provide useful information for the task, it will not always contain the answer. You need to work to get to a final_answer that makes sense.
+          • FACTUAL Qs:
+              web_search → final_answer
+          • CURRENT EVENTS:
+              To have some summary information use web_search, that might output a promising website to visit and read content from using (visit_webpage or download_file_from_link and read_file_content)
+              web_search → visit_webpage → final_answer
+          • DOCUMENT-BASED Qs:
+              web_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer
+          • ARXIV PAPERS:
+              The arxiv search tool provides a list of results with summary content, to inspect the whole paper you need to download it with download_file_from_link tool.
+              arxiv_search → download_file_from_link → read_file_content
+              If that fails
+              arxiv_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents
+          • MEDIA ANALYSIS:
+              download_file_from_link → transcribe_video/transcribe_audio/describe_image → final_answer
+        FINAL ANSWER FORMAT:
+          - Begin with "FINAL ANSWER: "
+          - Number → digits only (e.g., 42)
+          - String → exact text (e.g., Pope Francis)
+          - List → comma-separated, one space (e.g., 2, 3, 4)
+          - Conclude with: FINAL ANSWER: <your_answer>
+        """
+    def run(self, question: str, task_id: str, to_download) -> str:
+        """
+        Run the agent with the given question, task_id, and download flag.
+        Args:
+            question (str): The question or task for the agent to process
+            task_id (str): A unique identifier for the task
+            to_download (Bool): Flag indicating whether to download resources
+        Returns:
+            str: The agent's response
+        """
+        prompt = self._get_system_prompt()
+        # Task introduction
+        prompt += "\nHere is the Task you need to solve:\n\n"
+        prompt += f"Task: {question}\n\n"
+        # Include download instructions if applicable
+        if to_download:
+            link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+            prompt += (
+                "IMPORTANT: Before solving the task, you must download a required file.\n"
+                f"Use the `download_file_from_link` tool with this link: {link}\n"
+                "After downloading, use the appropriate tool to read or process the file "
+                "before attempting to solve the task.\n\n"
+            )
+        # Run the agent with the given question
+        result = self.agent.generate_response(question)
+        # Extract the final answer from the result
+        final_answer = extract_final_answer(result)
+        return final_answer
+# Example of how to use this code (commented out)
+# if __name__ == "__main__":
+#     agent = BasicAgent()
+#     response = agent("What is the current population of Tokyo?", "population_query", True)
+#     print(f"Response: {response}")

app.py CHANGED Viewed

@@ -1,34 +1,38 @@
 import os
 import gradio as gr
-import requests
-import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -55,16 +59,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
@@ -76,23 +80,48 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
@@ -162,20 +191,19 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +211,18 @@ if __name__ == "__main__":
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

+# app.py
 import os
 import gradio as gr
 import pandas as pd
+import requests
+from agent import BoomBot
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Basic Agent Definition --
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
+        self.agent = BoomBot(provider="deepinfra")
+    def __call__(self, question: str, task_id: str, to_download) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        return self.agent.run(question, task_id, to_download)
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name", "")
+        if file_name.strip() != "":
+            to_download = True
+        else:
+            to_download = False
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text, task_id, to_download=to_download)
+            answers_payload.append(
+                {"task_id": task_id, "submitted_answer": submitted_answer}
+            )
+            results_log.append(
+                {
+                    "Task ID": task_id,
+                    "Question": question_text,
+                    "Submitted Answer": submitted_answer,
+                }
+            )
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append(
+                {
+                    "Task ID": task_id,
+                    "Question": question_text,
+                    "Submitted Answer": f"AGENT ERROR: {e}",
+                }
+            )
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload,
+    }
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(
+        label="Run Status / Submission Result", lines=5, interactive=False
+    )
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
+    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:  # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(
+            f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
+        )
     else:
+        print(
+            "ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
+        )
+    print("-" * (60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

app_template.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import os
+import gradio as gr
+import requests
+import inspect
+import pandas as pd
+# (Keep Constants as is)
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Basic Agent Definition ---
+# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+class BasicAgent:
+    def __init__(self):
+        print("BasicAgent initialized.")
+    def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        fixed_answer = "This is a default answer."
+        print(f"Agent returning fixed answer: {fixed_answer}")
+        return fixed_answer
+def run_and_submit_all( profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    if profile:
+        username= f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
+    try:
+        agent = BasicAgent()
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+        except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
+if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup: # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

requirements copy.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+beautifulsoup4
+chromadb
+duckduckgo_search
+gradio
+huggingface_hub
+langchain
+langchain-chroma
+langchain-community
+langchain-core
+langchain-groq
+langchain-huggingface
+langchain-google-genai
+langchain-tavily
+langgraph
+markdownify
+pandas
+protobuf==3.20.*
+PyMuPDF
+python-dotenv
+requests
+sentence-transformers
+smolagents
+traitlets

requirements.txt CHANGED Viewed

@@ -1,2 +1,23 @@
 gradio
-requests

+beautifulsoup4
+chromadb
+duckduckgo_search
 gradio
+huggingface_hub
+langchain
+langchain-chroma
+langchain-community
+langchain-core
+langchain-groq
+langchain-huggingface
+langchain-google-genai
+langchain-tavily
+langgraph
+markdownify
+pandas
+protobuf==3.20.*
+PyMuPDF
+python-dotenv
+requests
+sentence-transformers
+smolagents
+traitlets

tools.py ADDED Viewed

	@@ -0,0 +1,1114 @@

+import html
+import json
+import mimetypes
+import os
+import re
+import time
+import traceback
+from pathlib import Path
+from typing import Dict, List
+from urllib.parse import urlparse
+import chromadb
+import chromadb.utils.embedding_functions as embedding_functions
+import fitz  # PyMuPDF
+import pandas as pd
+import requests
+from bs4 import BeautifulSoup
+from duckduckgo_search import DDGS
+from duckduckgo_search.exceptions import (
+    ConversationLimitException,
+    DuckDuckGoSearchException,
+    RatelimitException,
+    TimeoutException,
+)
+from langchain_community.document_loaders import (
+    BSHTMLLoader,
+    JSONLoader,
+    PyPDFLoader,
+    TextLoader,
+    UnstructuredFileLoader,
+)
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.tools import BraveSearch
+from markdownify import markdownify
+from smolagents import Tool, tool
+from smolagents.utils import truncate_content
+from typing import Dict, List
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import quote_plus
+class ReadFileContentTool(Tool):
+    name = "read_file_content"
+    description = """Reads local files in various formats (text, CSV, Excel, PDF, HTML, etc.) and returns their content as readable text. Automatically detects and processes the appropriate file format."""
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "The full path to the file from which the content should be read.",
+        }
+    }
+    output_type = "string"
+    def forward(self, file_path: str) -> str:
+        if not os.path.exists(file_path):
+            return f"❌ File does not exist: {file_path}"
+        ext = os.path.splitext(file_path)[1].lower()
+        try:
+            if ext == ".txt":
+                with open(file_path, "r", encoding="utf-8") as f:
+                    return truncate_content(f.read())
+            elif ext == ".csv":
+                df = pd.read_csv(file_path)
+                return truncate_content(
+                    f"CSV Content:\n{df.to_string(index=False)}\n\nColumn names: {', '.join(df.columns)}"
+                )
+            elif ext in [".xlsx", ".xls"]:
+                df = pd.read_excel(file_path)
+                return truncate_content(
+                    f"Excel Content:\n{df.to_string(index=False)}\n\nColumn names: {', '.join(df.columns)}"
+                )
+            elif ext == ".pdf":
+                doc = fitz.open(file_path)
+                text = "".join([page.get_text() for page in doc])
+                doc.close()
+                return truncate_content(
+                    text.strip() or "⚠️ PDF contains no readable text."
+                )
+            elif ext == ".json":
+                with open(file_path, "r", encoding="utf-8") as f:
+                    return truncate_content(f.read())
+            elif ext == ".py":
+                with open(file_path, "r", encoding="utf-8") as f:
+                    return truncate_content(f.read())
+            elif ext in [".html", ".htm"]:
+                with open(file_path, "r", encoding="utf-8") as f:
+                    html = f.read()
+                try:
+                    markdown = markdownify(html).strip()
+                    markdown = re.sub(r"\n{3,}", "\n\n", markdown)
+                    return f"📄 HTML content (converted to Markdown):\n\n{truncate_content(markdown)}"
+                except Exception:
+                    soup = BeautifulSoup(html, "html.parser")
+                    text = soup.get_text(separator="\n").strip()
+                    return f"📄 HTML content (raw text fallback):\n\n{truncate_content(text)}"
+            elif ext in [".mp3", ".wav"]:
+                return f"ℹ️ Audio file detected: {os.path.basename(file_path)}. Use transcribe_audio tool to process the audio content."
+            elif ext in [".mp4", ".mov", ".avi"]:
+                return f"ℹ️ Video file detected: {os.path.basename(file_path)}. Use transcribe_video tool to process the video content."
+            else:
+                return f"ℹ️ Unsupported file type: {ext}. File saved at {file_path}"
+        except Exception as e:
+            return f"❌ Could not read {file_path}: {e}"
+class WikipediaSearchTool(Tool):
+    name = "wikipedia_search"
+    description = """Searches Wikipedia for a specific topic and returns a concise summary. Useful for background information on subjects, concepts, historical events, or scientific topics."""
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "The query or subject to search for on Wikipedia.",
+        }
+    }
+    output_type = "string"
+    def forward(self, query: str) -> str:
+        print(f"EXECUTING TOOL: wikipedia_search(query='{query}')")
+        try:
+            search_link = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={query}&format=json"
+            search_response = requests.get(search_link, timeout=10)
+            search_response.raise_for_status()
+            search_data = search_response.json()
+            if not search_data.get("query", {}).get("search", []):
+                return f"No Wikipedia info for '{query}'."
+            page_id = search_data["query"]["search"][0]["pageid"]
+            content_link = (
+                f"https://en.wikipedia.org/w/api.php?action=query&prop=extracts&"
+                f"exintro=1&explaintext=1&pageids={page_id}&format=json"
+            )
+            content_response = requests.get(content_link, timeout=10)
+            content_response.raise_for_status()
+            content_data = content_response.json()
+            extract = content_data["query"]["pages"][str(page_id)]["extract"]
+            if len(extract) > 1500:
+                extract = extract[:1500] + "..."
+            result = f"Wikipedia summary for '{query}':\n{extract}"
+            print(f"-> Tool Result (Wikipedia): {result[:100]}...")
+            return result
+        except Exception as e:
+            print(f"❌ Error in wikipedia_search: {e}")
+            traceback.print_exc()
+            return f"Error wiki: {e}"
+class TranscribeAudioTool(Tool):
+    name = "transcribe_audio"
+    description = """Converts spoken content in audio files to text. Handles various audio formats and produces a transcript of the spoken content for analysis."""
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "The full path to the audio file that needs to be transcribed.",
+        }
+    }
+    output_type = "string"
+    def forward(self, file_path: str) -> str:
+        try:
+            import os
+            import tempfile
+            import speech_recognition as sr
+            from pydub import AudioSegment
+            # Verify file exists
+            if not os.path.exists(file_path):
+                return (
+                    f"❌ Audio file not found at: {file_path}. Download the file first."
+                )
+            # Initialize recognizer
+            recognizer = sr.Recognizer()
+            # Convert to WAV if not already (needed for speech_recognition)
+            file_ext = os.path.splitext(file_path)[1].lower()
+            if file_ext != ".wav":
+                # Create temp WAV file
+                temp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
+                # Convert to WAV using pydub
+                audio = AudioSegment.from_file(file_path)
+                audio.export(temp_wav, format="wav")
+                audio_path = temp_wav
+            else:
+                audio_path = file_path
+            # Transcribe audio using Google's speech recognition
+            with sr.AudioFile(audio_path) as source:
+                audio_data = recognizer.record(source)
+                transcript = recognizer.recognize_google(audio_data)
+            # Clean up temp file if created
+            if file_ext != ".wav" and os.path.exists(temp_wav):
+                os.remove(temp_wav)
+            return transcript.strip()
+        except Exception as e:
+            return f"❌ Transcription failed: {str(e)}"
+class TranscibeVideoFileTool(Tool):
+    name = "transcribe_video"
+    description = """Extracts and transcribes speech from video files. Converts the audio portion of videos into readable text for analysis or reference."""
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "The full path to the video file that needs to be transcribed.",
+        }
+    }
+    output_type = "string"
+    def forward(self, file_path: str) -> str:
+        try:
+            # Verify file exists
+            if not os.path.exists(file_path):
+                return (
+                    f"❌ Video file not found at: {file_path}. Download the file first."
+                )
+            import os
+            import tempfile
+            import moviepy.editor as mp
+            import speech_recognition as sr
+            # Extract audio from video
+            video = mp.VideoFileClip(file_path)
+            # Create temporary audio file
+            temp_audio = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
+            # Extract audio to WAV format (required for speech_recognition)
+            video.audio.write_audiofile(temp_audio, verbose=False, logger=None)
+            video.close()
+            # Initialize recognizer
+            recognizer = sr.Recognizer()
+            # Transcribe audio
+            with sr.AudioFile(temp_audio) as source:
+                audio_data = recognizer.record(source)
+                transcript = recognizer.recognize_google(audio_data)
+            # Clean up temp file
+            if os.path.exists(temp_audio):
+                os.remove(temp_audio)
+            return transcript.strip()
+        except Exception as e:
+            return f"❌ Video processing failed: {str(e)}"
+class BraveWebSearchTool(Tool):
+    name = "web_search"
+    description = """Performs web searches and returns content from top results. Provides real-time information from across the internet including current events, facts, and website content relevant to your query."""
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "A web search query string (e.g., a question or query).",
+        }
+    }
+    output_type = "string"
+    # api_key = os.getenv("BRAVE_SEARCH_API_KEY")
+    api_key=None
+    count = 3
+    char_limit = 4000  # Adjust based on LLM context window
+    tool = BraveSearch.from_api_key(api_key=api_key, search_kwargs={"count": count})
+    def extract_main_text(self, url: str, char_limit: int) -> str:
+        try:
+            headers = {"User-Agent": "Mozilla/5.0"}
+            response = requests.get(url, headers=headers, timeout=10)
+            soup = BeautifulSoup(response.text, "html.parser")
+            # Remove scripts/styles
+            for tag in soup(["script", "style", "noscript"]):
+                tag.extract()
+            # Heuristic: extract visible text from body
+            body = soup.body
+            if not body:
+                return "⚠️ Could not extract content."
+            text = " ".join(t.strip() for t in body.stripped_strings)
+            return text[:char_limit].strip()
+        except Exception as e:
+            return f"⚠️ Failed to extract article: {e}"
+    def forward(self, query: str) -> str:
+        try:
+            results_json = self.tool.run(query)
+            results = (
+                json.loads(results_json)
+                if isinstance(results_json, str)
+                else results_json
+            )
+            output_parts = []
+            for i, r in enumerate(results[: self.count], start=1):
+                title = html.unescape(r.get("title", "").strip())
+                link = r.get("link", "").strip()
+                article_text = self.extract_main_text(link, self.char_limit)
+                result_block = (
+                    f"Result {i}:\n"
+                    f"Title: {title}\n"
+                    f"URL: {link}\n"
+                    f"Extracted Content:\n{article_text}\n"
+                )
+                output_parts.append(result_block)
+            return "\n\n".join(output_parts).strip()
+        except Exception as e:
+            return f"Search failed: {str(e)}"
+class DescribeImageTool(Tool):
+    name = "describe_image"
+    description = """Analyzes images and generates detailed text descriptions. Identifies objects, scenes, text, and visual elements within the image to provide context or understanding."""
+    inputs = {
+        "image_path": {
+            "type": "string",
+            "description": "The full path to the image file to describe.",
+        }
+    }
+    output_type = "string"
+    def forward(self, image_path: str) -> str:
+        import os
+        from PIL import Image
+        from transformers import BlipForConditionalGeneration, BlipProcessor
+        if not os.path.exists(image_path):
+            return f"❌ Image file does not exist: {image_path}"
+        try:
+            processor = BlipProcessor.from_pretrained(
+                "Salesforce/blip-image-captioning-base", use_fast=True
+            )
+            model = BlipForConditionalGeneration.from_pretrained(
+                "Salesforce/blip-image-captioning-base"
+            )
+            image = Image.open(image_path).convert("RGB")
+            inputs = processor(images=image, return_tensors="pt")
+            output_ids = model.generate(**inputs)
+            caption = processor.decode(output_ids[0], skip_special_tokens=True)
+            return caption.strip() or "⚠️ No caption could be generated."
+        except Exception as e:
+            return f"❌ Failed to describe image: {e}"
+class DownloadFileFromLinkTool(Tool):
+    name = "download_file_from_link"
+    description = "Downloads files from a URL and saves them locally. Supports various formats including PDFs, documents, images, and data files. Returns the local file path for further processing."
+    inputs = {
+        "link": {"type": "string", "description": "The URL to download the file from."},
+        "file_name": {
+            "type": "string",
+            "description": "Desired name of the saved file, without extension.",
+            "nullable": True,
+        },
+    }
+    output_type = "string"
+    SUPPORTED_EXTENSIONS = {
+        ".xlsx",
+        ".pdf",
+        ".txt",
+        ".csv",
+        ".json",
+        ".xml",
+        ".html",
+        ".jpg",
+        ".jpeg",
+        ".png",
+        ".mp4",
+        ".mp3",
+        ".wav",
+        ".zip",
+    }
+    def forward(self, link: str, file_name: str = "taskfile") -> str:
+        print(f"⬇️ Downloading file from: {link}")
+        dir_path = "./downloads"
+        os.makedirs(dir_path, exist_ok=True)
+        try:
+            response = requests.get(link, stream=True, timeout=30)
+        except requests.RequestException as e:
+            return f"❌ Error: Request failed - {e}"
+        if response.status_code != 200:
+            return (
+                f"❌ Error: Unable to fetch file. Status code: {response.status_code}"
+            )
+        # Step 1: Try extracting extension from provided filename
+        base_name, provided_ext = os.path.splitext(file_name)
+        provided_ext = provided_ext.lower()
+        # Step 2: Check if provided extension is supported
+        if provided_ext and provided_ext in self.SUPPORTED_EXTENSIONS:
+            ext = provided_ext
+        else:
+            # Step 3: Try to infer from Content-Type
+            content_type = (
+                response.headers.get("Content-Type", "").split(";")[0].strip()
+            )
+            guessed_ext = mimetypes.guess_extension(content_type or "") or ""
+            # Step 4: If mimetype returned .bin or nothing useful, try to fallback to URL
+            if guessed_ext in ("", ".bin"):
+                parsed_link = urlparse(link)
+                _, url_ext = os.path.splitext(parsed_link.path)
+                if url_ext.lower() in self.SUPPORTED_EXTENSIONS:
+                    ext = url_ext.lower()
+                else:
+                    return f"⚠️ Warning: Cannot determine a valid file extension from '{content_type}' or URL. Please retry with an explicit valid filename and extension."
+            else:
+                ext = guessed_ext
+        # Step 5: Final path and save
+        file_path = os.path.join(dir_path, base_name + ext)
+        downloaded = 0
+        with open(file_path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=1024):
+                if chunk:
+                    f.write(chunk)
+                    downloaded += len(chunk)
+        return file_path
+class DuckDuckGoSearchTool(Tool):
+    name = "web_search"
+    description = """Performs web searches and returns content from top results. Provides real-time information from across the internet including current events, facts, and website content relevant to your query."""
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "The search query to run on DuckDuckGo",
+        },
+    }
+    output_type = "string"
+    def _configure(self, max_retries: int = 3, retry_sleep: int = 3):
+        self._max_retries = max_retries
+        self._retry_sleep = retry_sleep
+    def forward(self, query: str) -> str:
+        self._configure()
+        print(
+            f"EXECUTING TOOL: duckduckgo_search(query='{query}', top_results={top_results})"
+        )
+        top_results = 5
+        retries = 0
+        max_retries = getattr(self, "_max_retries", 3)
+        retry_sleep = getattr(self, "_retry_sleep", 2)
+        while retries < max_retries:
+            try:
+                results = DDGS().text(
+                    keywords=query,
+                    region="wt-wt",
+                    safesearch="moderate",
+                    max_results=top_results,
+                )
+                if not results:
+                    return "No results found."
+                output_lines = []
+                for idx, res in enumerate(results[:top_results], start=1):
+                    title = res.get("title", "N/A")
+                    url = res.get("href", "N/A")
+                    snippet = res.get("body", "N/A")
+                    output_lines.append(
+                        f"Result {idx}:\n"
+                        f"Title: {title}\n"
+                        f"URL: {url}\n"
+                        f"Snippet: {snippet}\n"
+                    )
+                output = "\n".join(output_lines)
+                print(f"-> Tool Result (DuckDuckGo): {output[:1500]}...")
+                return output
+            except (
+                DuckDuckGoSearchException,
+                TimeoutException,
+                RatelimitException,
+                ConversationLimitException,
+            ) as e:
+                retries += 1
+                print(
+                    f"⚠️ DuckDuckGo Exception (Attempt {retries}/{max_retries}): {type(e).__name__}: {e}"
+                )
+                traceback.print_exc()
+                time.sleep(retry_sleep)
+            except Exception as e:
+                print(f"❌ Unexpected Error: {e}")
+                traceback.print_exc()
+                return f"Unhandled exception during DuckDuckGo search: {e}"
+        return f"❌ Failed to retrieve results after {max_retries} retries."
+huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
+    model_name="sentence-transformers/all-mpnet-base-v2"
+)
+SUPPORTED_EXTENSIONS = [
+    ".txt",
+    ".md",
+    ".py",
+    ".pdf",
+    ".json",
+    ".jsonl",
+    ".html",
+    ".htm",
+]
+class AddDocumentToVectorStoreTool(Tool):
+    name = "add_document_to_vector_store"
+    description = "Processes a document and adds it to the vector database for semantic search. Automatically chunks files and creates text embeddings to enable powerful content retrieval."
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "Absolute path to the file to be indexed.",
+        }
+    }
+    output_type = "string"
+    def _load_file(self, path: Path):
+        """Select the right loader for the file extension."""
+        if path.suffix == ".pdf":
+            return PyPDFLoader(str(path)).load()
+        elif path.suffix == ".json":
+            return JSONLoader(str(path), jq_schema=".").load()
+        elif path.suffix in [".md"]:
+            return UnstructuredFileLoader(str(path)).load()
+        elif path.suffix in [".html", ".htm"]:
+            return BSHTMLLoader(str(path)).load()
+        else:  # fallback for .txt, .py, etc.
+            return TextLoader(str(path)).load()
+    def forward(self, file_path: str) -> str:
+        print(f"📄 Adding document to vector store: {file_path}")
+        try:
+            collection_name = "vectorstore"
+            path = Path(file_path)
+            if not path.exists() or path.suffix not in SUPPORTED_EXTENSIONS:
+                return f"Unsupported or missing file: {file_path}"
+            docs = self._load_file(path)
+            text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=500, chunk_overlap=50
+            )
+            split_docs = text_splitter.split_documents(docs)
+            client = chromadb.Client(
+                chromadb.config.Settings(
+                    persist_directory="./chroma_store",
+                )
+            )
+            collection = client.get_or_create_collection(
+                name=collection_name,
+                configuration={"embedding_function": huggingface_ef},
+            )
+            texts = [doc.page_content for doc in split_docs]
+            metadatas = [doc.metadata for doc in split_docs]
+            collection.add(
+                documents=texts,
+                metadatas=metadatas,
+                ids=[f"{path.stem}_{i}" for i in range(len(texts))],
+            )
+            return f"✅ Successfully added {len(texts)} chunks from '{file_path}' to collection '{collection_name}'."
+        except Exception as e:
+            print(f"❌ Error in add_to_vector_store: {e}")
+            traceback.print_exc()
+            return f"Error: {e}"
+class QueryVectorStoreTool(Tool):
+    name = "query_downloaded_documents"
+    description = "Performs semantic searches across your downloaded documents. Use detailed queries to find specific information, concepts, or answers from your collected resources."
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "The search query. Ensure this is constructed intelligently so to retrieve the most relevant outputs.",
+        },
+        "top_k": {
+            "type": "integer",
+            "description": "Number of top results to retrieve. Usually between 3 and 30",
+            "nullable": True,
+        },
+    }
+    output_type = "string"
+    def forward(self, query: str, top_k: int = 5) -> str:
+        collection_name = "vectorstore"
+        if k < 3:
+            k = 3
+        if k > 30:
+            k = 30
+        print(f"🔎 Querying vector store '{collection_name}' with: '{query}'")
+        try:
+            client = chromadb.Client(
+                chromadb.config.Settings(
+                    persist_directory="./chroma_store",
+                )
+            )
+            collection = client.get_collection(name=collection_name)
+            results = collection.query(
+                query_texts=[query],
+                n_results=top_k,
+            )
+            formatted = []
+            for i in range(len(results["documents"][0])):
+                doc = results["documents"][0][i]
+                metadata = results["metadatas"][0][i]
+                formatted.append(
+                    f"Result {i+1}:\n" f"Content: {doc}\n" f"Metadata: {metadata}\n"
+                )
+            return "\n".join(formatted) or "No relevant documents found."
+        except Exception as e:
+            print(f"❌ Error in query_vector_store: {e}")
+            traceback.print_exc()
+            return f"Error querying vector store: {e}"
+@tool
+def image_question_answering(image_path: str, prompt: str) -> str:
+    """
+    Analyzes images and answers specific questions about their content. Can identify objects, read text, describe scenes, or interpret visual information based on your questions.
+    Args:
+        image_path: The path to the image file
+        prompt: The question to ask about the image
+    Returns:
+        A string answer generated by the local Ollama model
+    """
+    # Check for supported file types
+    file_extension = image_path.lower().split(".")[-1]
+    if file_extension not in ["jpg", "jpeg", "png", "bmp", "gif", "webp"]:
+        return "Unsupported file type. Please provide an image."
+    path = Path(image_path)
+    if not path.exists():
+        return f"File not found at: {image_path}"
+    # Send the image and prompt to Ollama's local model
+    response = chat(
+        model="llava",  # Assuming your model is named 'lava'
+        messages=[
+            {
+                "role": "user",
+                "content": prompt,
+                "images": [path],
+            },
+        ],
+        options={"temperature": 0.2},  # Slight randomness for naturalness
+    )
+    return response.message.content.strip()
+class VisitWebpageTool(Tool):
+    name = "visit_webpage"
+    description = "Loads a webpage from a URL and converts its content to markdown format. Use this to browse websites, extract information, or identify downloadable resources from a specific web address."
+    inputs = {
+        "url": {
+            "type": "string",
+            "description": "The url of the webpage to visit.",
+        }
+    }
+    output_type = "string"
+    def forward(self, url: str) -> str:
+        try:
+            from urllib.parse import urlparse
+            import requests
+            from bs4 import BeautifulSoup
+            from markdownify import markdownify
+            from requests.exceptions import RequestException
+            from smolagents.utils import truncate_content
+        except ImportError as e:
+            raise ImportError(
+                "You must install packages `markdownify`, `requests`, and `beautifulsoup4` to run this tool: for instance run `pip install markdownify requests beautifulsoup4`."
+            ) from e
+        try:
+            # Get the webpage content
+            headers = {
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+            }
+            response = requests.get(url, headers=headers, timeout=20)
+            response.raise_for_status()
+            # Parse the HTML with BeautifulSoup
+            soup = BeautifulSoup(response.text, "html.parser")
+            # Extract domain name for context
+            domain = urlparse(url).netloc
+            # Remove common clutter elements
+            self._remove_clutter(soup)
+            # Try to identify and prioritize main content
+            main_content = self._extract_main_content(soup)
+            if main_content:
+                # Convert the cleaned HTML to markdown
+                markdown_content = markdownify(str(main_content)).strip()
+            else:
+                # Fallback to full page content if main content extraction fails
+                markdown_content = markdownify(str(soup)).strip()
+            # Post-process the markdown content
+            markdown_content = self._clean_markdown(markdown_content)
+            # Add source information
+            result = f"Content from {domain}:\n\n{markdown_content}"
+            return truncate_content(result, 40000)
+        except requests.exceptions.Timeout:
+            return "The request timed out. Please try again later or check the URL."
+        except RequestException as e:
+            return f"Error fetching the webpage: {str(e)}"
+        except Exception as e:
+            return f"An unexpected error occurred: {str(e)}"
+    def _remove_clutter(self, soup):
+        """Remove common elements that clutter web pages."""
+        # Common non-content elements to remove
+        clutter_selectors = [
+            "header",
+            "footer",
+            "nav",
+            ".nav",
+            ".navigation",
+            ".menu",
+            ".sidebar",
+            ".footer",
+            ".header",
+            "#footer",
+            "#header",
+            "#nav",
+            "#sidebar",
+            ".widget",
+            ".cookie",
+            ".cookies",
+            ".ad",
+            ".ads",
+            ".advertisement",
+            "script",
+            "style",
+            "noscript",
+            "iframe",
+            ".social",
+            ".share",
+            ".comment",
+            ".comments",
+            ".subscription",
+            ".newsletter",
+            '[role="banner"]',
+            '[role="navigation"]',
+            '[role="complementary"]',
+        ]
+        for selector in clutter_selectors:
+            for element in soup.select(selector):
+                element.decompose()
+        # Remove hidden elements
+        for hidden in soup.select(
+            '[style*="display: none"], [style*="display:none"], [style*="visibility: hidden"], [style*="visibility:hidden"], [hidden]'
+        ):
+            hidden.decompose()
+    def _extract_main_content(self, soup):
+        """Try to identify and extract the main content of the page."""
+        # Priority order for common main content containers
+        main_content_selectors = [
+            "main",
+            '[role="main"]',
+            "article",
+            ".content",
+            ".main-content",
+            ".post-content",
+            "#content",
+            "#main",
+            "#main-content",
+            ".article",
+            ".post",
+            ".entry",
+            ".page-content",
+            ".entry-content",
+        ]
+        # Try to find the main content container
+        for selector in main_content_selectors:
+            main_content = soup.select(selector)
+            if main_content:
+                # If multiple matches, find the one with the most text content
+                if len(main_content) > 1:
+                    return max(main_content, key=lambda x: len(x.get_text()))
+                return main_content[0]
+        # If no main content container found, look for the largest text block
+        paragraphs = soup.find_all("p")
+        if paragraphs:
+            # Find the parent that contains the most paragraphs
+            parents = {}
+            for p in paragraphs:
+                if p.parent:
+                    if p.parent not in parents:
+                        parents[p.parent] = 0
+                    parents[p.parent] += 1
+            if parents:
+                # Return the parent with the most paragraphs
+                return max(parents.items(), key=lambda x: x[1])[0]
+        # Return None if we can't identify main content
+        return None
+    def _clean_markdown(self, content):
+        """Clean up the markdown content."""
+        # Normalize whitespace
+        content = re.sub(r"\n{3,}", "\n\n", content)
+        # Remove consecutive duplicate links
+        content = re.sub(r"(\[.*?\]\(.*?\))\s*\1+", r"\1", content)
+        # Remove very short lines that are likely menu items
+        lines = content.split("\n")
+        filtered_lines = []
+        # Skip consecutive short lines (likely menus)
+        short_line_threshold = 40  # characters
+        consecutive_short_lines = 0
+        max_consecutive_short_lines = 3
+        for line in lines:
+            stripped_line = line.strip()
+            if len(
+                stripped_line
+            ) < short_line_threshold and not stripped_line.startswith("#"):
+                consecutive_short_lines += 1
+                if consecutive_short_lines > max_consecutive_short_lines:
+                    continue
+            else:
+                consecutive_short_lines = 0
+            filtered_lines.append(line)
+        content = "\n".join(filtered_lines)
+        # Remove duplicate headers
+        seen_headers = set()
+        lines = content.split("\n")
+        filtered_lines = []
+        for line in lines:
+            if line.startswith("#"):
+                header_text = line.strip()
+                if header_text in seen_headers:
+                    continue
+                seen_headers.add(header_text)
+            filtered_lines.append(line)
+        content = "\n".join(filtered_lines)
+        # Remove lines containing common footer patterns
+        footer_patterns = [
+            r"^copyright",
+            r"^©",
+            r"^all rights reserved",
+            r"^terms",
+            r"^privacy policy",
+            r"^contact us",
+            r"^follow us",
+            r"^social media",
+            r"^disclaimer",
+        ]
+        footer_pattern = "|".join(footer_patterns)
+        lines = content.split("\n")
+        filtered_lines = []
+        for line in lines:
+            if not re.search(footer_pattern, line.lower()):
+                filtered_lines.append(line)
+        content = "\n".join(filtered_lines)
+        return content
+class ArxivSearchTool(Tool):
+    name = "arxiv_search"
+    description = """Searches arXiv for academic papers and returns structured information including titles, authors, publication dates, abstracts, and download links."""
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "A research-related query (e.g., 'AI regulation')",
+        },
+        "from_date": {
+            "type": "string",
+            "description": "Optional search start date in format (YYYY or YYYY-MM or YYYY-MM-DD) (e.g., '2022-06' or '2022' or '2022-04-12')",
+            "nullable": True,
+        },
+        "to_date": {
+            "type": "string",
+            "description": "Optional search end date in (YYYY or YYYY-MM or YYYY-MM-DD) (e.g., '2022-06' or '2022' or '2022-04-12')",
+            "nullable": True,
+        },
+    }
+    output_type = "string"
+    def forward(
+        self,
+        query: str,
+        from_date: str = None,
+        to_date: str = None,
+    ) -> str:
+        # 1) build URL
+        url = build_arxiv_url(query, from_date, to_date, size=50)
+        # 2) fetch & parse
+        try:
+            papers = fetch_and_parse_arxiv(url)
+        except Exception as e:
+            return f"❌ Failed to fetch or parse arXiv results: {e}"
+        if not papers:
+            return "No results found for your query."
+        # 3) format into a single string
+        output_lines = []
+        for idx, p in enumerate(papers, start=1):
+            output_lines += [
+                f"🔍 RESULT {idx}",
+                f"Title        : {p['title']}",
+                f"Authors      : {p['authors']}",
+                f"Published    : {p['published']}",
+                f"Summary      : {p['abstract'][:500]}{'...' if len(p['abstract'])>500 else ''}",
+                f"Entry ID     : {p['entry_link']}",
+                f"Download link: {p['download_link']}",
+                "",
+            ]
+        return "\n".join(output_lines).strip()
+def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
+    """
+    Fetches the given arXiv advanced‐search URL, parses the HTML,
+    and returns a list of results. Each result is a dict containing:
+      - title
+      - authors
+      - published
+      - abstract
+      - entry_link
+      - doi (or "[N/A]" if none)
+    """
+    resp = requests.get(url)
+    resp.raise_for_status()
+    soup = BeautifulSoup(resp.text, "html.parser")
+    results = []
+    for li in soup.find_all("li", class_="arxiv-result"):
+        # Title
+        t = li.find("p", class_="title")
+        title = t.get_text(strip=True) if t else ""
+        # Authors
+        a = li.find("p", class_="authors")
+        authors = a.get_text(strip=True).replace("Authors:", "").strip() if a else ""
+        # Abstract
+        ab = li.find("span", class_="abstract-full")
+        abstract = (
+            ab.get_text(strip=True).replace("Abstract:", "").strip() if ab else ""
+        )
+        # Published date
+        d = li.find("p", class_="is-size-7")
+        published = d.get_text(strip=True) if d else ""
+        # Entry link
+        lt = li.find("p", class_="list-title")
+        entry_link = lt.find("a")["href"] if lt and lt.find("a") else ""
+        # DOI
+        idblock = li.find("p", class_="list-identifier")
+        if idblock:
+            for a_tag in idblock.find_all("a", href=True):
+                if "doi.org" in a_tag["href"]:
+                    doi = a_tag["href"]
+                    break
+        results.append(
+            {
+                "title": title,
+                "authors": authors,
+                "published": published,
+                "abstract": abstract,
+                "entry_link": entry_link,
+                "download_link": (
+                    entry_link.replace("abs", "pdf") if "abs" in entry_link else "N/A"
+                ),
+            }
+        )
+    return results
+def build_arxiv_url(
+    query: str, from_date: str = None, to_date: str = None, size: int = 50
+) -> str:
+    """
+    Build an arXiv advanced-search URL matching the exact segment order:
+      1) ?advanced
+      2) terms-0-operator=AND
+      3) terms-0-term=…
+      4) terms-0-field=all
+      5) classification-physics_archives=all
+      6) classification-include_cross_list=include
+      [ optional date‐range block ]
+      7) abstracts=show
+      8) size=…
+      9) order=-announced_date_first
+    If from_date or to_date is None, the date-range block is omitted.
+    """
+    base = "https://arxiv.org/search/advanced?advanced="
+    parts = [
+        "&terms-0-operator=AND",
+        f"&terms-0-term={quote_plus(query)}",
+        "&terms-0-field=all",
+        "&classification-physics_archives=all",
+        "&classification-include_cross_list=include",
+    ]
+    # optional date-range filtering
+    if from_date and to_date:
+        parts += [
+            "&date-year=",
+            "&date-filter_by=date_range",
+            f"&date-from_date={from_date}",
+            f"&date-to_date={to_date}",
+            "&date-date_type=submitted_date",
+        ]
+    parts += [
+        "&abstracts=show",
+        f"&size={size}",
+        "&order=-announced_date_first",
+    ]
+    return base + "".join(parts)