Final_Assignment_Template_3

Sleeping

App Files Files Community

Scott Cogan commited on Jun 7, 2025

Commit

72ec790

1 Parent(s): 292d225

latest requirements

Browse files

Files changed (1) hide show

app.py +104 -154

app.py CHANGED Viewed

@@ -5,234 +5,184 @@ import inspect
 import pandas as pd
 import asyncio
 from langchain_google_genai import ChatGoogleGenerativeAI
-from typing import IO, Dict
 from io import BytesIO
-from langchain_core.messages import HumanMessage, SystemMessage
-from langgraph.graph import StateGraph
 import base64
 from google.ai.generativelanguage_v1beta.types import Tool as GenAITool
 import google.generativeai as genai
-import os
-from pydantic import BaseModel
-from typing import List, Any
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-# (Keep Constants as is)
-# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 GEMINI_API_KEY = os.getenv("Gemini_API_key")
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
-# --- Basic Agent Definition ---
-# Agent capabilities required: Search the web, listen to audio recordings, watch YouTube videos (process the footage, not the transcript), work with Excel spreadsheets
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-def get_file(task_id: str) -> IO:
-    '''
-    Downloads the file associated with the given task_id, if one exists and is mapped.
-    If the question mentions an attachment, use this function.
-    Args:
-        task_id: Id of the question.
-    Returns:
-        The file associated with the question.
-    '''
-    file_request = requests.get(url=f'https://agents-course-unit4-scoring.hf.space/files/{task_id}')
-    file_request.raise_for_status()
-    return BytesIO(file_request.content)
 def analyse_excel(task_id: str) -> Dict[str, float]:
-    '''
-    Analyzes the Excel file associated with the given task_id and returns the sum of each numeric column.
-    Args:
-        task_id: Id of the question.
-    Returns:
-        A dictionary with the sum of each numeric column.
-    '''
     excel_file = get_file(task_id)
     df = pd.read_excel(excel_file, sheet_name=0)
     return df.select_dtypes(include='number').sum().to_dict()
 def add_numbers(a: float, b: float) -> float:
-    '''
-    Adds two numbers together.
-    Args:
-        a: First number.
-        b: Second number.
-    Returns:
-        The sum of the two numbers.
-    '''
     return a + b
 def transcribe_audio(task_id: str) -> HumanMessage:
-    '''
-    Opens an audio file and returns its content as a string.
-    Args:
-        file: The audio file to be opened.
-    Returns:
-        The content of the audio file as a string.
-    '''
     audio_file = get_file(task_id)
     if audio_file is None:
         raise ValueError("No audio file found for the given task_id.")
-    # Encode the audio file to base64
-    audio_file.seek(0)  # Ensure the file pointer is at the beginning
     encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
     return HumanMessage(
         content=[
             {"type": "text", "text": "Transcribe the audio."},
             {
                 "type": "media",
-                "data": encoded_audio,  # Use base64 string directly
                 "mime_type": "audio/mpeg",
             },
         ]
     )
 def python_code(task_id: str) -> str:
-    '''
-    Returns the Python code associated with the given task_id.
-    Args:
-        task_id: Id of the question.
-    Returns:
-        The Python code associated with the question.
-    '''
-    code_request = requests.get(url=f'https://agents-course-unit4-scoring.hf.space/files/{task_id}')
     code_request.raise_for_status()
     return code_request.text
 def open_image(task_id: str) -> str:
-    '''
-    Opens an image file associated with the given task_id.
-    Args:
-        task_id: Id of the question.
-    Returns:
-        The base64 encoded string of the image file.
-    '''
     image_file = get_file(task_id)
     if image_file is None:
         raise ValueError("No image file found for the given task_id.")
     return base64.b64encode(image_file.read()).decode("utf-8")
-def open_youtube_video(url: str, query:str) -> str:
-    '''
-    Answers a question about a video from the given URL.
-    Args:
-        url: The URL of the video file.
-        query: The question to be answered about the video.
-    Returns:
-        Answer to the question about the video.
-    '''
     client = genai.Client(api_key=GOOGLE_API_KEY)
     response = client.models.generate_content(
-    model='models/gemini-2.0-flash',
-    contents=types.Content(
-        parts=[
-            types.Part(
-                file_data=types.FileData(file_uri=url)
-            ),
-            types.Part(text=f'''{query} YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated
-                       list of numbers and/or strings.''')
             ]
-            )
-            )
     return response.text
 def google_search(query: str) -> str:
-    '''
-    Performs a Google search for the given query.
-    Args:
-        query: The search query.
-    Returns:
-        The search results as a string.
-    '''
     llm = ChatGoogleGenerativeAI(
-            model="gemini-2.5-flash-preview-05-20",
-            max_tokens=8192,
-            temperature=0
-            )
-    response = llm.invoke(query,
-        tools=[GenAITool(google_search={})]
     )
     return response.content
-class AgentState(BaseModel):
-    messages: List[Any]
 class BasicAgent:
     def __init__(self):
         self.llm = ChatGoogleGenerativeAI(
             model="gemini-2.5-flash-preview-05-20",
             max_tokens=8192,
             temperature=0
-            )
-        self.tools = [get_file, analyse_excel, add_numbers, transcribe_audio, python_code, open_image, open_youtube_video
-         , google_search
-         ]
-        self.agent = self.llm  # Use the LLM directly
         self.sys_msg = SystemMessage('''You are a general AI assistant. I will ask you a question. Only provide YOUR FINAL ANSWER and nothing else.
         YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
         If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
         If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
         If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-        You have access to multiple tools and should use as many as you need to answer the question.
-        If you are asked to analyze an Excel file, use the 'analyse_excel' tool.
-        If you are asked to download a file, use the 'get_file' tool.
-        If you are asked to add two numbers, use the 'add_numbers' tool. If you need to add more than two numbers, use the 'add_numbers'
-        tool multiple times.
-        If you are asked to transcribe an audio file, use the 'transcribe_audio' tool.
-        If you are asked to run a Python code, use the 'python_code' tool.
-        If you are asked to open an image, use the 'open_image' tool.
-        If you were given a link with www.youtube.com, use the 'open_youtube_video' tool.
-        If the question requires a web search because your internal knowledge doesn't have the information, use the 'google_search' tool.
-                        ''')
-        # Graph
-        self.builder = StateGraph(state_schema=AgentState)
-        # Define nodes: these do the work
-        self.builder.add_node("START", lambda state: AgentState(**state), input=AgentState)
-        self.builder.add_node("assistant", self.assistant)
-        self.builder.add_node("tools", self.tools_node)
-        # Define edges: these determine how the control flow moves
-        self.builder.add_edge("START", "assistant")
-        self.builder.add_edge("assistant", "tools")
-        self.builder.add_edge("tools", "assistant")
-        print("Nodes:", self.builder.nodes)
-        print("Edges:", self.builder.edges)
-        self.react_graph = self.builder.compile()
         print("BasicAgent initialized.")
-    def assistant(self, state):
-        return {"messages": [self.llm.invoke([self.sys_msg] + state["messages"])]}
-    def tools_node(self, state):
-        # Execute the tool and return the result
-        tool_name = state["messages"][-1].content
-        for tool in self.tools:
-            if tool.__name__ == tool_name:
-                return {"messages": [tool(*state["args"])]}
-        return {"messages": [f"Tool {tool_name} not found"]}
     async def __call__(self, question: str, task_id: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        await asyncio.sleep(60)
-        messages = self.react_graph.invoke({"messages": [f'Task id: {task_id}\n {question}']}, node="START")
-        return messages["messages"][-1].content if messages["messages"] else fixed_answer
 def run_and_submit_all(profile):
     """

 import pandas as pd
 import asyncio
 from langchain_google_genai import ChatGoogleGenerativeAI
+from typing import IO, Dict, TypedDict, Annotated, Sequence
 from io import BytesIO
+from langchain_core.messages import HumanMessage, SystemMessage, BaseMessage, AIMessage
+from langgraph.graph import StateGraph, END
 import base64
 from google.ai.generativelanguage_v1beta.types import Tool as GenAITool
 import google.generativeai as genai
+import operator
+from langgraph.prebuilt import ToolExecutor
+from langchain_core.tools import tool
+from utilities import get_file
+# Constants
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 GEMINI_API_KEY = os.getenv("Gemini_API_key")
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
+# Define the state type
+class AgentState(TypedDict):
+    messages: Annotated[Sequence[BaseMessage], operator.add]
+    next: str
+# Convert existing functions to tools
+@tool
 def analyse_excel(task_id: str) -> Dict[str, float]:
+    '''Analyzes the Excel file associated with the given task_id.'''
     excel_file = get_file(task_id)
     df = pd.read_excel(excel_file, sheet_name=0)
     return df.select_dtypes(include='number').sum().to_dict()
+@tool
 def add_numbers(a: float, b: float) -> float:
+    '''Adds two numbers together.'''
     return a + b
+@tool
 def transcribe_audio(task_id: str) -> HumanMessage:
+    '''Transcribes an audio file.'''
     audio_file = get_file(task_id)
     if audio_file is None:
         raise ValueError("No audio file found for the given task_id.")
+    audio_file.seek(0)
     encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
     return HumanMessage(
         content=[
             {"type": "text", "text": "Transcribe the audio."},
             {
                 "type": "media",
+                "data": encoded_audio,
                 "mime_type": "audio/mpeg",
             },
         ]
     )
+@tool
 def python_code(task_id: str) -> str:
+    '''Returns the Python code associated with the given task_id.'''
+    code_request = requests.get(url=f'{DEFAULT_API_URL}/files/{task_id}')
     code_request.raise_for_status()
     return code_request.text
+@tool
 def open_image(task_id: str) -> str:
+    '''Opens an image file associated with the given task_id.'''
     image_file = get_file(task_id)
     if image_file is None:
         raise ValueError("No image file found for the given task_id.")
     return base64.b64encode(image_file.read()).decode("utf-8")
+@tool
+def open_youtube_video(url: str, query: str) -> str:
+    '''Answers a question about a video from the given URL.'''
     client = genai.Client(api_key=GOOGLE_API_KEY)
     response = client.models.generate_content(
+        model='models/gemini-2.0-flash',
+        contents=types.Content(
+            parts=[
+                types.Part(file_data=types.FileData(file_uri=url)),
+                types.Part(text=f'''{query} YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated
+                           list of numbers and/or strings.''')
             ]
+        )
+    )
     return response.text
+@tool
 def google_search(query: str) -> str:
+    '''Performs a Google search for the given query.'''
     llm = ChatGoogleGenerativeAI(
+        model="gemini-2.5-flash-preview-05-20",
+        max_tokens=8192,
+        temperature=0
     )
+    response = llm.invoke(query, tools=[GenAITool(google_search={})])
     return response.content
 class BasicAgent:
     def __init__(self):
         self.llm = ChatGoogleGenerativeAI(
             model="gemini-2.5-flash-preview-05-20",
             max_tokens=8192,
             temperature=0
+        )
+        # Create tool executor
+        self.tools = [
+            get_file, analyse_excel, add_numbers, transcribe_audio,
+            python_code, open_image, open_youtube_video, google_search
+        ]
+        self.tool_executor = ToolExecutor(self.tools)
+        # System message
         self.sys_msg = SystemMessage('''You are a general AI assistant. I will ask you a question. Only provide YOUR FINAL ANSWER and nothing else.
         YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
         If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
         If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
         If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+        You have access to multiple tools and should use as many as you need to answer the question.''')
+        # Create the graph
+        self.workflow = StateGraph(AgentState)
+        # Add nodes
+        self.workflow.add_node("agent", self.call_model)
+        self.workflow.add_node("tools", self.call_tools)
+        # Add edges
+        self.workflow.add_edge("agent", "tools")
+        self.workflow.add_edge("tools", "agent")
+        # Set entry point
+        self.workflow.set_entry_point("agent")
+        # Compile the graph
+        self.app = self.workflow.compile()
         print("BasicAgent initialized.")
+    def call_model(self, state: AgentState) -> AgentState:
+        """Call the model to generate a response."""
+        messages = state["messages"]
+        response = self.llm.invoke([self.sys_msg] + messages)
+        return {"messages": [response], "next": "tools"}
+    def call_tools(self, state: AgentState) -> AgentState:
+        """Call the tools based on the model's response."""
+        messages = state["messages"]
+        last_message = messages[-1]
+        if isinstance(last_message, AIMessage):
+            # Extract tool calls from the message
+            tool_calls = last_message.tool_calls
+            if tool_calls:
+                for tool_call in tool_calls:
+                    tool_name = tool_call.name
+                    tool_args = tool_call.args
+                    result = self.tool_executor.invoke(tool_name, tool_args)
+                    messages.append(AIMessage(content=f"Tool result: {result}"))
+        return {"messages": messages, "next": "agent"}
     async def __call__(self, question: str, task_id: str) -> str:
+        """Process a question and return the answer."""
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # Create initial state
+        initial_state = {
+            "messages": [HumanMessage(content=f'Task id: {task_id}\n {question}')],
+            "next": "agent"
+        }
+        # Process through the graph
+        result = self.app.invoke(initial_state)
+        # Extract the final answer
+        final_message = result["messages"][-1]
+        return final_message.content if isinstance(final_message, AIMessage) else "No answer generated."
 def run_and_submit_all(profile):
     """