Spaces:

kberasneva
/

Final_Assignment_Template

Sleeping

App Files Files Community

Katya Beresneva commited on Jun 5, 2025

Commit

523e34e

1 Parent(s): b75609c

fix

Browse files

Files changed (6) hide show

.gitattributes +0 -17
agent.py +122 -111
app.py +207 -44
requirements.txt +12 -5
tools.py +373 -170
utils.py +6 -21

.gitattributes DELETED Viewed

@@ -1,17 +0,0 @@
-asyncer
-anyio
-arxiv
-gradio
-httpx
-requests
-langgraph==0.0.12
-langchain-google-genai
-langchain-community
-langchain-tavily
-openpyxl
-smolagents
-tavily-python
-wikipedia-api
-wikipedia
-duckduckgo-search
-python-dotenv

agent.py CHANGED Viewed

@@ -1,134 +1,145 @@
 import os
-from typing import Optional, List, Dict, Any
 from langchain_core.messages import HumanMessage
-from langchain.agents import AgentExecutor, create_react_agent
-from langchain.tools import BaseTool, Tool
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.runnables import RunnableConfig
-from smolagents import DuckDuckGoSearchTool, PythonInterpreterTool
-from tools import (
-    analyze_audio,
-    analyze_excel,
-    analyze_image,
-    analyze_video,
-    download_file_for_task,
-    read_file_contents,
-    search_arxiv,
-    search_tavily,
-    search_wikipedia,
-    SmolagentToolWrapper,
-    tavily_extract_tool
-)
 from utils import get_llm
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")
 if not GOOGLE_API_KEY:
     raise ValueError("GOOGLE_API_KEY environment variable is not set.")
 AGENT_MODEL_NAME = os.getenv("AGENT_MODEL_NAME", "gemini-2.0-flash")
-KATE_AGENT_PROMPT = """
-You are Kate's Advanced AI Assistant designed to solve complex tasks efficiently.
-Key Principles:
-1. Always think step-by-step before answering
-2. Be extremely concise - only provide the exact answer needed
-3. Use tools whenever possible
-4. Never guess - if unsure, say "I don't know"
-5. Follow the exact format: FINAL ANSWER: [your answer]
-Special Capabilities:
-- Advanced web search
-- Multimedia analysis
-- Data processing
-- Code execution
-Examples:
-QUESTION: Capital of France? FINAL ANSWER: Paris
-QUESTION: 2+2? FINAL ANSWER: 4
-QUESTION: List colors in rainbow. FINAL ANSWER: red, orange, yellow, green, blue, indigo, violet
 """
-class KateMultiModalAgent:
-    def __init__(self, model_name: Optional[str] = None):
-        self.llm = get_llm(
             llm_provider_api_key=GOOGLE_API_KEY,
-            model_name=model_name or AGENT_MODEL_NAME,
         )
-        self.tools = self._get_tools()
-        self.agent_executor = self._create_agent_executor()
-    def _get_tools(self) -> List[BaseTool]:
-        """Convert all tools to LangChain Tool format"""
         tools = [
-            self._wrap_tool(SmolagentToolWrapper(DuckDuckGoSearchTool())),
-            self._wrap_tool(SmolagentToolWrapper(PythonInterpreterTool())),
-            self._wrap_tool(download_file_for_task),
-            self._wrap_tool(read_file_contents),
-            self._wrap_tool(analyze_audio),
-            self._wrap_tool(analyze_image),
-            self._wrap_tool(analyze_excel),
-            self._wrap_tool(analyze_video),
-            self._wrap_tool(search_arxiv),
-            self._wrap_tool(search_tavily),
-            self._wrap_tool(search_wikipedia),
-            self._wrap_tool(tavily_extract_tool),
         ]
-        return tools
-    def _wrap_tool(self, tool: Any) -> Tool:
-        """Convert any tool to LangChain Tool format"""
-        return Tool(
-            name=tool.name,
-            description=tool.description,
-            func=tool._run,
-            coroutine=tool._arun,
-        )
-    def _create_agent_executor(self) -> AgentExecutor:
-        """Create the agent executor with React strategy"""
-        prompt = ChatPromptTemplate.from_template(KATE_AGENT_PROMPT)
-        agent = create_react_agent(self.llm, self.tools, prompt)
-        return AgentExecutor(
-            agent=agent,
-            tools=self.tools,
-            handle_parsing_errors=True,
-            max_iterations=10,
-            verbose=True
-        )
     async def __call__(
-        self,
-        task_id: str,
-        question: str,
-        file_name: Optional[str] = None
     ) -> str:
-        """Execute the agent on a given task"""
-        config = RunnableConfig(
             recursion_limit=64,
-            configurable={"thread_id": task_id}
         )
-        if not file_name:
-            file_name = "None - no file present"
-        input_message = {
-            "input": f"Task Id: {task_id}, Question: {question}, Filename: {file_name}",
-            "chat_history": []
-        }
-        try:
-            response = await self.agent_executor.ainvoke(
-                input_message,
-                config=config
-            )
-            output = response.get("output", "")
-            if "FINAL ANSWER: " in output:
-                return output.split("FINAL ANSWER: ", 1)[1].strip()
-            return output
-        except Exception as e:
-            print(f"Error processing task {task_id}: {str(e)}")
-            return f"Error: {str(e)}"

 import os
 from langchain_core.messages import HumanMessage
+from langchain_core.runnables.config import RunnableConfig
+from langgraph.checkpoint.memory import MemorySaver
+from langchain.globals import set_debug
+from langchain.globals import set_verbose
+from langgraph.prebuilt import create_react_agent
+from langgraph.prebuilt import ToolNode
+from langgraph.prebuilt.chat_agent_executor import AgentState
+from smolagents import DuckDuckGoSearchTool
+from smolagents import PythonInterpreterTool
+from tools import analyze_audio
+from tools import analyze_excel
+from tools import analyze_image
+from tools import analyze_video
+from tools import download_file_for_task
+from tools import read_file_contents
+from tools import search_arxiv
+from tools import search_tavily
+from tools import search_wikipedia
+from tools import SmolagentToolWrapper
+from tools import tavily_extract_tool
 from utils import get_llm
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")
 if not GOOGLE_API_KEY:
     raise ValueError("GOOGLE_API_KEY environment variable is not set.")
 AGENT_MODEL_NAME = os.getenv("AGENT_MODEL_NAME", "gemini-2.0-flash")
+MULTIMODAL_TASK_SOLVER_PROMPT = """
+You are a specialized multimodal task-solving AI assistant capable of handling complex data analysis and information retrieval tasks.
+Core Operating Guidelines:
+- Employ systematic analysis: Break down problems into logical steps
+- Maintain brevity: Provide answers in the most concise format possible - raw numbers, single words, or comma-delimited lists
+- Format compliance:
+  * Numbers: No commas, units, or currency symbols
+  * Lists: Pure comma-separated values without additional text
+  * Text: Bare minimum words, no sentences or explanations
+- Tool utilization:
+  * For multimedia content (images, audio, video) - use dedicated analysis tools
+  * For data processing (Excel, structured data) - use appropriate parsers
+  * For information retrieval - leverage search tools
+- Verification principle: Never guess - use available tools to verify information
+- Code usage: Implement Python code for calculations and data transformations
+- Answer format: Always prefix final answers with 'FINAL ANSWER: '
+- Counting queries: Return only the numerical count
+- Listing queries: Return only the comma-separated items
+- Sorting queries: Return only the ordered list
+Sample Responses:
+Q: Current Bitcoin price in USD? A: 47392
+Q: Sort these colors: blue, red, azure A: azure, blue, red
+Q: Capital of France? A: Paris
+Q: Count vowels in 'hello' A: 2
+Q: Temperature scale used in USA? A: Fahrenheit
+Q: List prime numbers under 10 A: 2, 3, 5, 7
+Q: Most streamed artist 2023? A: Taylor Swift
 """
+#set_debug(True)
+#set_verbose(True)
+class MultiModalTaskState(AgentState):
+    task_identifier: str
+    query_text: str
+    input_file_path: str
+class MultiModalAgent:
+    def __init__(self, model_name: str | None = None):
+        if model_name is None:
+            model_name = AGENT_MODEL_NAME
+        llm = self._get_llm(model_name)
+        tools = self._get_tools()
+        self.agent = create_react_agent(
+            llm,
+            tools=tools,
+            state_schema=MultiModalTaskState,
+            state_modifier=MULTIMODAL_TASK_SOLVER_PROMPT,
+            checkpointer = MemorySaver()
+        )
+    def _get_llm(self, model_name: str):
+        return get_llm(
             llm_provider_api_key=GOOGLE_API_KEY,
+            model_name=model_name,
         )
+    def _get_tools(self):
         tools = [
+            SmolagentToolWrapper(DuckDuckGoSearchTool()),
+            SmolagentToolWrapper(PythonInterpreterTool()),
+            download_file_for_task,
+            read_file_contents,
+            analyze_audio,
+            analyze_image,
+            analyze_excel,
+            analyze_video,
+            search_arxiv,
+            search_tavily,
+            search_wikipedia,
+            tavily_extract_tool,
         ]
+        return ToolNode(tools)
     async def __call__(
+        self, task_identifier: str, query_text: str, input_file_path: str | None = None
     ) -> str:
+        execution_config = RunnableConfig(
             recursion_limit=64,
+            configurable={ "thread_id": task_identifier }
         )
+        if not input_file_path:
+            input_file_path = "None - no file present"
+        user_input = HumanMessage(
+            content=
+            [
+                {
+                    "type": "text",
+                    "text": f"Task Id: {task_identifier}, Question: {query_text}, Filename: {input_file_path}. If a filename is present (and is not 'None'), download the file for the task that's referenced in the question. If there isn't a filename present, please use tools where applicable."
+                }
+            ]
+        )
+        response = await self.agent.ainvoke(
+            {
+                "messages": [user_input],
+                "question": query_text,
+                "task_id": task_identifier,
+                "file_name": input_file_path
+            }, execution_config)
+        final_response = response['messages'][-1].content
+        if "FINAL ANSWER: " in final_response:
+            return final_response.split("FINAL ANSWER: ", 1)[1].strip()
+        else:
+            return final_response

app.py CHANGED Viewed

@@ -3,56 +3,219 @@ import os
 import gradio as gr
 import requests
 import pandas as pd
-from agent import KateMultiModalAgent
-agent = KateMultiModalAgent()
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-AGENT_NAME = "Kate's Advanced Agent"
 async def run_agent(
-    agt: KateMultiModalAgent,
     item: dict
 ) -> str | None:
     try:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        file_name = item.get("file_name", None)
-        if not task_id or question_text is None:
-            print(f"Skipping invalid item: {item}")
-            return None
-        print(f"Processing task {task_id}...")
-        submitted_answer = await agt(task_id, question_text, file_name)
-        return {
-            "task_id": task_id,
-            "question": question_text,
-            "submitted_answer": submitted_answer
-        }
     except Exception as e:
-        print(f"Error processing task {item.get('task_id')}: {str(e)}")
-        return None
-def run_and_submit_all(profile: gr.OAuthProfile | None):
-    if not profile:
-        return "Please login with your Hugging Face account (Kate Berasneva)", None
-    username = profile.username
-    print(f"Kate's Agent running for user: {username}")
-with gr.Blocks(title="Kate's Agent Evaluation Runner") as demo:
-    gr.Markdown("# Kate's Advanced Agent Evaluation")
-    gr.Markdown("""
-    **Welcome to Kate Berasneva's Agent Solution!**
-    This enhanced agent features:
-    - Improved error handling
-    - Better tool integration
-    - Custom prompt engineering
-    - Efficient task processing
-    1. Login with your HF account
-    2. Click Run Evaluation
-    3. View your results!
-    """)

 import gradio as gr
 import requests
 import pandas as pd
+from agent import MultiModalAgent
+# (Keep Constants as is)
+# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 async def run_agent(
+    agt: MultiModalAgent,
     item: dict
 ) -> str | None:
+    task_id = item.get("task_id")
+    question_text = item.get("question")
+    file_name = item.get("file_name", None)
+    if not task_id or question_text is None:
+        print(f"Skipping item with missing task_id or question: {item}")
+        return None
+    submitted_answer = await agt(task_id, question_text, file_name)
+    return {
+        "task_id": task_id,
+        "question": question_text,
+        "submitted_answer": submitted_answer
+    }
+def run_and_submit_all( profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    if profile:
+        username= f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = MultiModalAgent()
     except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    try:
+        #see if there is a loop already running. If there is, reuse it.
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        # Create new event loop if one is not running
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    try:
+        results = loop.run_until_complete(
+            asyncio.gather(*(run_agent(agent, item) for item in questions_data))
+        )
+        answers_payload = [{key: value for key, value in item.items() if key != "question"}
+                 for item in results]
+        for item in results:
+            results_log.append(
+                {
+                    "Task ID": item['task_id'],
+                    "Question": item['question'],
+                    "Submitted Answer": item['submitted_answer']
+                }
+            )
+    finally:
+        # Clean up
+        loop.close()
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
+if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup: # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,9 +1,16 @@
 gradio
 requests
 langgraph
-langchain-core>=0.1.0
-langchain-community>=0.0.1
-pydantic==2.0
-python-dotenv
 smolagents
-langchain-google-genai

+asyncer
+anyio
+arxiv
 gradio
+httpx
 requests
 langgraph
+langchain-google-genai
+langchain-community
+langchain-tavily
+openpyxl
 smolagents
+tavily-python
+wikipedia-api
+wikipedia
+duckduckgo-search

tools.py CHANGED Viewed

@@ -1,219 +1,422 @@
 from langchain.tools import tool
-from typing import Any, Optional
-class SmolagentToolWrapper:
-    """Wrapper class to make smolagents tools compatible with LangChain."""
-    def __init__(self, tool: Any):
-        self.tool = tool
-        self.name = tool.name
-        self.description = tool.description
-    async def _arun(self, *args: Any, **kwargs: Any) -> Any:
-        """Async run the tool."""
-        if hasattr(self.tool, 'async_run'):
-            return await self.tool.async_run(*args, **kwargs)
-        return await self.tool.run(*args, **kwargs)
-    def _run(self, *args: Any, **kwargs: Any) -> Any:
-        """Sync run the tool."""
-        return self.tool.run(*args, **kwargs)
-@tool("kate-enhanced-search", parse_docstring=True)
-async def kate_search(
     query: str,
-    max_results: int = 3
-) -> dict:
     """
-    Kate's enhanced search combining multiple sources.
-    Args:
-        query: Search query
-        max_results: Max results to return
-    Returns:
-        dict: Combined results from Tavily, Wikipedia and Arxiv
-    """
-    pass
-@tool("analyze-excel-enhanced", parse_docstring=True)
-async def analyze_excel_enhanced(state: dict, file_path: str) -> str:
-    """
-    Kate's enhanced Excel analyzer with better data processing.
-    Args:
-        state: Current state dictionary
-        file_path: Path to the Excel file
-    Returns:
-        str: Analysis results or error message
-    Features:
-    - Improved data validation
-    - Support for larger files
-    - Better error messages
-    """
-    try:
-        pass
-    except Exception as e:
-        return f"ERROR: {str(e)}"
-@tool("analyze-audio")
-async def analyze_audio(file_path: str) -> str:
-    """
-    Analyze audio file content.
-    Args:
-        file_path: Path to the audio file
-    Returns:
-        str: Analysis results of the audio content
-    """
-    try:
-        return "Audio analysis placeholder"
-    except Exception as e:
-        return f"ERROR: {str(e)}"
-@tool("analyze-image")
-async def analyze_image(file_path: str) -> str:
-    """
-    Analyze image file content.
     Args:
-        file_path: Path to the image file
     Returns:
-        str: Analysis results of the image content
     """
-    try:
-        return "Image analysis placeholder"
-    except Exception as e:
-        return f"ERROR: {str(e)}"
-@tool("analyze-excel")
-async def analyze_excel(file_path: str) -> str:
     """
-    Analyze Excel file content.
     Args:
-        file_path: Path to the Excel file
     Returns:
-        str: Analysis results of the Excel content
     """
-    try:
-        return "Excel analysis placeholder"
-    except Exception as e:
-        return f"ERROR: {str(e)}"
-@tool("analyze-video")
-async def analyze_video(file_path: str) -> str:
     """
-    Analyze video file content.
     Args:
-        file_path: Path to the video file
     Returns:
-        str: Analysis results of the video content
     """
-    try:
-        return "Video analysis placeholder"
-    except Exception as e:
-        return f"ERROR: {str(e)}"
-@tool("download-file")
-async def download_file_for_task(url: str) -> str:
     """
-    Download file from URL.
     Args:
-        url: URL of the file to download
     Returns:
-        str: Path to the downloaded file
     """
-    try:
-        return "Download placeholder"
-    except Exception as e:
-        return f"ERROR: {str(e)}"
-@tool("read-file")
 async def read_file_contents(file_path: str) -> str:
     """
-    Read contents of a file.
     Args:
-        file_path: Path to the file
     Returns:
-        str: Contents of the file
     """
-    try:
-        return "File contents placeholder"
-    except Exception as e:
-        return f"ERROR: {str(e)}"
-@tool("search-arxiv")
-async def search_arxiv(query: str) -> dict:
     """
-    Search arXiv papers.
     Args:
-        query: Search query
     Returns:
-        dict: Search results from arXiv
     """
-    try:
-        return {"results": "arXiv search placeholder"}
-    except Exception as e:
-        return {"error": str(e)}
-@tool("search-tavily")
-async def search_tavily(query: str) -> dict:
     """
-    Search using Tavily API.
     Args:
-        query: Search query
     Returns:
-        dict: Search results from Tavily
     """
-    try:
-        return {"results": "Tavily search placeholder"}
-    except Exception as e:
-        return {"error": str(e)}
-@tool("search-wikipedia")
-async def search_wikipedia(query: str) -> dict:
     """
-    Search Wikipedia articles.
     Args:
-        query: Search query
     Returns:
-        dict: Search results from Wikipedia
     """
-    try:
-        return {"results": "Wikipedia search placeholder"}
-    except Exception as e:
-        return {"error": str(e)}
-@tool("tavily-extract")
-async def tavily_extract_tool(url: str) -> str:
     """
-    Extract content from URL using Tavily.
     Args:
-        url: URL to extract content from
     Returns:
-        str: Extracted content
     """
-    try:
-        return "Tavily extract placeholder"
-    except Exception as e:
-        return f"ERROR: {str(e)}"

+import os
+import dotenv
+import pandas as pd
+import tempfile
+import typing
+from base64 import b64encode
+from io import StringIO
+import httpx
+from anyio import Path
+from asyncer import asyncify
+from langchain_community.document_loaders import ArxivLoader
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_core.messages import HumanMessage
+from langchain_tavily import TavilyExtract
+from langchain_tavily import TavilySearch
+from langgraph.prebuilt import create_react_agent
+from langgraph.prebuilt import InjectedState
+from langchain.tools import BaseTool
 from langchain.tools import tool
+from pydantic import Field
+from typing_extensions import Annotated
+from utils import get_llm
+dotenv.load_dotenv()
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")
+if not GOOGLE_API_KEY:
+    raise ValueError("GOOGLE_API_KEY environment variable is not set.")
+AGENT_MODEL_NAME = os.getenv("AGENT_MODEL_NAME", "gemini-2.0-flash")
+MULTIMODAL_FILE_ANALYZER_PROMPT = """
+You are a specialized file analysis AI assistant focused on extracting information from various file formats including images, videos, audio, and structured data.
+Core Analysis Guidelines:
+- Systematic processing: Analyze file contents step by step
+- Precise responses: Provide answers in the most concise format - raw numbers, single words, or comma-delimited lists
+- Format requirements:
+  * Numbers: No formatting (no commas, units, or symbols)
+  * Lists: Pure comma-separated values
+  * Text: Minimal words, no explanations
+- Analysis approach:
+  * Images: Focus on visual elements, objects, text, and scene composition
+  * Audio: Identify sounds, speech, music, and audio characteristics
+  * Video: Analyze visual content, motion, and temporal elements
+  * Excel/CSV: Extract relevant data points and patterns
+- Verification focus: Base answers solely on file contents
+- Answer format: Always prefix with 'FINAL ANSWER: '
+- Counting tasks: Return only the count
+- Listing tasks: Return only the items
+- Sorting tasks: Return only the ordered list
+Example Responses:
+Q: Count people in image? A: 3
+Q: List colors in logo? A: blue, red, white
+Q: Main topic of audio? A: weather forecast
+Q: Excel total sales? A: 15420
+Q: Video duration? A: 45
+"""
+class SmolagentToolWrapper(BaseTool):
+    """Smol wrapper to allow Langchain/Graph to leverage smolagents tools"""
+    wrapped_tool: object = Field(description="Smolagents tool (wrapped)")
+    def __init__(self, tool):
+        super().__init__(
+            name=tool.name,
+            description=tool.description,
+            return_direct=False,
+            wrapped_tool=tool,
+        )
+    def _run(self, query: str) -> str:
+        try:
+            return self.wrapped_tool(query)
+        except Exception as e:
+            return f"Error using SmolagentToolWrapper: {str(e)}"
+    def _arun(self, *args: typing.Any, **kwargs: typing.Any) -> typing.Any:
+        """Async version of the tool"""
+        return asyncify(self._run, cancellable=True)(*args, **kwargs)
+tavily_extract_tool = TavilyExtract()
+@tool("search-tavily-tool", parse_docstring=True)
+async def search_tavily(
     query: str,
+    state: Annotated[dict, InjectedState],
+    included_domains: list[str] = None,
+    max_results: int = 5,
+) -> dict[str, str]:
     """
+    Search the web using Tavily API with optional domain filtering.
+    This function performs a search using the Tavily search engine and returns formatted results.
+    You can specify domains to include in the search results for more targeted information.
     Args:
+        query (str): The search query to search the web for
+        included_domains (list[str], optional): List of domains to include in search results
+                                            (e.g., ["wikipedia.org", "cnn.com"]). Defaults to None.
+        max_results (int, optional): Maximum number of results to return. Defaults to 5.
     Returns:
+        dict[str, str]: A dictionary with key 'tavily_results' containing formatted search results.
+                        Each result includes document source, page information, and content.
+    Example:
+        results = await search_tavily("How many albums did Michael Jackson produce", included_domains=[], topic="general")
+        # Returns filtered results about Michael Jackson
     """
+    # Configure Tavily search with provided parameters
+    tavily_search_tool = TavilySearch(
+        max_results=max_results,
+        topic="general",
+        include_domains=included_domains if included_domains else None,
+        search_depth="advanced",
+        include_answer="advanced",
+    )
+    # Execute search
+    search_docs = await tavily_search_tool.arun(state["question"])
+    # Format results
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.get("url", "No URL")}"/>{doc.get("title", "No Title")}\n{doc.get("content", "")}\n</Document>'
+            for doc in search_docs.get("results", [])
+        ]
+    )
+    results = {"tavily_results": formatted_search_docs}
+    answer = search_docs.get("answer", None)
+    if answer:
+        results["tavily_answer"] = answer
+    return results
+@tool("search-arxiv-tool", parse_docstring=True)
+async def search_arxiv(query: str, max_num_result: int = 5) -> dict[str, str]:
     """
+    Search arXiv for academic papers matching the provided query.
+    This function queries the arXiv database for scholarly articles related to the
+    search query and returns a formatted collection of the results.
     Args:
+        query (str): The search query to find relevant academic papers.
+        max_num_result (int, optional): Maximum number of results to return. Defaults to 5.
     Returns:
+        dict[str, str]: A dictionary with key 'arxiv_results' containing formatted search results.
+        Each result includes document source, page information, and content.
+    Example:
+        results = await search_arxiv("quantum computing", 3)
+        # Returns dictionary with up to 3 formatted arXiv papers about quantum computing
     """
+    search_docs = await ArxivLoader(query=query, load_max_docs=max_num_result).aload()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ]
+    )
+    return {"arvix_results": formatted_search_docs}
+@tool("search-wikipedia-tool", parse_docstring=True)
+async def search_wikipedia(query: str, max_num_result: int = 5) -> dict[str, str]:
     """
+    Search Wikipedia for articles matching the provided query.
+    This function queries the Wikipedia database for articles related to the
+    search term and returns a formatted collection of the results.
     Args:
+        query (str): The search query to find relevant Wikipedia articles.
+        max_num_result (int, optional): Maximum number of results to return. Defaults to 5.
     Returns:
+        dict[str, str]: A dictionary with key 'wikipedia_results' containing formatted search results.
+        Each result includes document source, page information, and content.
+    Example:
+        results = await search_wikipedia("neural networks", 3)
+        # Returns dictionary with up to 3 formatted Wikipedia articles about neural networks
     """
+    search_docs = await WikipediaLoader(
+        query=query,
+        load_max_docs=max_num_result,
+        load_all_available_meta=True,
+        doc_content_chars_max=128000,
+    ).aload()
+    #print(search_docs)
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ]
+    )
+    return {"wikipedia_results": formatted_search_docs}
+@tool("download-file-for-task-tool", parse_docstring=True)
+async def download_file_for_task(task_id: str, filename: str | None = None) -> str:
     """
+    Download a file for task_id, save to a temporary file, and return path
     Args:
+        task_id: The task id file to download
+        filename: Optional filename (will be generated if not provided)
     Returns:
+        String path to the downloaded file
     """
+    if filename is None:
+        filename = task_id
+    temp_dir = Path(tempfile.gettempdir())
+    filepath = temp_dir / filename
+    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+    async with httpx.AsyncClient() as client:
+        async with client.stream("GET", url) as response:
+            response.raise_for_status()
+            async with await filepath.open("wb") as f:
+                async for chunk in response.aiter_bytes(chunk_size=4096):
+                    await f.write(chunk)
+    return str(filepath)
+@tool("read-file-contents-tool", parse_docstring=True)
 async def read_file_contents(file_path: str) -> str:
     """
+    Read a file and return its contents
     Args:
+        file_path: String path to file to read
     Returns:
+        Contents of the file at file_path
     """
+    path = Path(file_path)
+    return await path.read_text()
+@tool("analyze-image-tool", parse_docstring=True)
+async def analyze_image(state: Annotated[dict, InjectedState], image_path: str) -> str:
     """
+    Analyze the image at image_path
     Args:
+        image_path: String path where the image file is located on disk
     Returns:
+        Answer to the question about the image file
     """
+    path = Path(image_path)
+    async with await path.open("rb") as rb:
+        img_base64 = b64encode(await rb.read()).decode("utf-8")
+    llm = get_llm(
+        llm_provider_api_key=GOOGLE_API_KEY,
+        model_name=AGENT_MODEL_NAME,
+    )
+    file_agent = create_react_agent(
+        llm,
+        tools=[],
+        state_modifier=MULTIMODAL_FILE_ANALYZER_PROMPT,
+    )
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": state["question"]},
+            {
+                "type": "image",
+                "source_type": "base64",
+                "mime_type": "image/png",
+                "data": img_base64,
+            },
+        ]
+    )
+    messages = await file_agent.ainvoke({"messages": [message]})
+    return messages["messages"][-1].content
+@tool("analyze-excel-tool", parse_docstring=True)
+async def analyze_excel(state: Annotated[dict, InjectedState], excel_path: str) -> str:
     """
+    Analyze the excel file at excel_path
     Args:
+        excel_path: String path where the excel file is located on disk
     Returns:
+        Answer to the question about the excel file
     """
+    df = pd.read_excel(excel_path)
+    csv_buffer = StringIO()
+    df.to_csv(csv_buffer, index=False)
+    csv_contents = csv_buffer.getvalue()
+    csv_contents_bytes = csv_contents.encode("utf-8")
+    csv_contents_base64 = b64encode(csv_contents_bytes).decode("utf-8")
+    llm = get_llm(
+        llm_provider_api_key=GOOGLE_API_KEY,
+        model_name=AGENT_MODEL_NAME,
+    )
+    file_agent = create_react_agent(
+        llm,
+        tools=[],
+        state_modifier=MULTIMODAL_FILE_ANALYZER_PROMPT,
+    )
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": state["question"]},
+            {
+                "type": "file",
+                "source_type": "base64",
+                "mime_type": "text/csv",
+                "data": csv_contents_base64,
+            },
+        ],
+    )
+    messages = await file_agent.ainvoke({"messages": [message]})
+    return messages["messages"][-1].content
+@tool("analyze-audio-tool", parse_docstring=True)
+async def analyze_audio(state: Annotated[dict, InjectedState], audio_path: str) -> str:
     """
+    Analyze the audio at audio_path
     Args:
+        audio_path: String path where the audio file is located on disk
     Returns:
+        Answer to the question about the audio file
     """
+    audio_mime_type = "audio/mpeg"
+    path = Path(audio_path)
+    async with await path.open("rb") as rb:
+        encoded_audio = b64encode(await rb.read()).decode("utf-8")
+    llm = get_llm(
+        llm_provider_api_key=GOOGLE_API_KEY,
+        model_name=AGENT_MODEL_NAME,
+    )
+    file_agent = create_react_agent(
+        llm,
+        tools=[],
+        state_modifier=MULTIMODAL_FILE_ANALYZER_PROMPT,
+    )
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": state["question"]},
+            {"type": "media", "data": encoded_audio, "mime_type": audio_mime_type},
+        ],
+    )
+    messages = await file_agent.ainvoke({"messages": [message]})
+    return messages["messages"][-1].content
+@tool("analyze-video-tool", parse_docstring=True)
+async def analyze_video(state: Annotated[dict, InjectedState], video_url: str) -> str:
     """
+    Analyze the video at video_url
     Args:
+        video_url: URL where the video is located
     Returns:
+        Answer to the question about the video url
     """
+    llm = get_llm(
+        llm_provider_api_key=GOOGLE_API_KEY,
+        model_name=AGENT_MODEL_NAME,
+    )
+    file_agent = create_react_agent(
+        llm,
+        tools=[],
+        state_modifier=MULTIMODAL_FILE_ANALYZER_PROMPT,
+    )
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": state["question"]},
+            {
+                "type": "media",
+                "mime_type": "video/mp4",
+                "file_uri": video_url,
+            },
+        ],
+    )
+    messages = await file_agent.ainvoke({"messages": [message]})
+    return messages["messages"][-1].content

utils.py CHANGED Viewed

@@ -1,28 +1,13 @@
-from typing import Optional
 from langchain_google_genai import ChatGoogleGenerativeAI
 def get_llm(
     llm_provider_api_key: str,
-    model_name: Optional[str] = None,
-    temperature: float = 0.7,
-    max_tokens: Optional[int] = None
-) -> ChatGoogleGenerativeAI:
-    """
-    Initialize and return a Google Generative AI language model.
-    Args:
-        llm_provider_api_key: Google API key
-        model_name: Name of the model to use (default: None)
-        temperature: Sampling temperature (default: 0.7)
-        max_tokens: Maximum number of tokens to generate (default: None)
-    Returns:
-        ChatGoogleGenerativeAI: Initialized language model
-    """
     return ChatGoogleGenerativeAI(
         google_api_key=llm_provider_api_key,
         model=model_name,
-        temperature=temperature,
-        max_output_tokens=max_tokens,
-        convert_system_message_to_human=True
-    )

 from langchain_google_genai import ChatGoogleGenerativeAI
 def get_llm(
     llm_provider_api_key: str,
+    model_name: str = "gemini-2.0-flash",  # Default model aligned with AGENT_MODEL_NAME
+):
     return ChatGoogleGenerativeAI(
         google_api_key=llm_provider_api_key,
+        temperature=0.7,
+        max_retries=5,
         model=model_name,
+    )