Final_Assignment_Template

Sleeping

App Files Files Community

llamasrock commited on Jun 5, 2025

Commit

09d73b3

verified ·

1 Parent(s): b7bb69f

Update app.py

Browse files

Changed to LangGraph agent

Files changed (1) hide show

app.py +193 -38

app.py CHANGED Viewed

@@ -3,13 +3,20 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-import os
-from smolagents import LiteLLMModel, CodeAgent, GoogleSearchTool
-from google import genai
-from google.genai import types
 import asyncio
 import requests
-from utilities import get_file
 # (Keep Constants as is)
 # --- Constants ---
@@ -21,50 +28,198 @@ SERPER_API_KEY = os.getenv("SERPER_API_KEY")
 # Agent capabilities required: Search the web, listen to audio recordings, watch YouTube videos (process the footage, not the transcript), work with Excel spreadsheets
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
-        self.llm_model = LiteLLMModel(
-            model_id="gemini/gemini-2.5-flash", # you can see other model names here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models. It is important to prefix the name with "gemini/"
-            api_key=GEMINI_API_KEY,
-            api_base = 'https://generativelanguage.googleapis.com',
-            max_tokens=8192
             )
-        # self.google_search_tool = Tool(google_search = GoogleSearch())
-        self.google_search_tool = GoogleSearchTool()
-        self.get_file_tool = get_file
-        self.agent = CodeAgent(model = self.llm_model, tools = [self.google_search_tool, self.get_file_tool])
-        # # Define Google API client with GoogleSearch tool
-        # self.client = genai.Client(api_key=GEMINI_API_KEY)
-        print("BasicAgent initialized.")
-    async def __call__(self, question: str, task_id: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        # print(f"Agent returning fixed answer: {fixed_answer}")
-        # return fixed_answer
-        prompt = f'''You are a general AI assistant. I will ask you a question. Only provide YOUR FINAL ANSWER and nothing else.
         YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
         If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
         If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
         If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-        {question}'''
-        await asyncio.sleep(10)
-        return self.agent.run(prompt, task_id)
-        # # Use the Google GenAI client to run the question
-        # answer = self.client.models.generate_content(
-        #     model='gemini-2.0-flash',
-        #     contents=f'''Answer the following question in the format as requested. If the format not specified, then provide a single word/number/name.
-        #     In your response, do not include anything other than your answer. {question}''',
-        #     config=types.GenerateContentConfig(
-        #         tools=[types.Tool(google_search=types.GoogleSearch()),
-        #             types.Tool(code_execution=self.get_file)]
-        #     )
-        # )
-        # return answer.text
 def run_and_submit_all( profile: gr.OAuthProfile | None):

 import requests
 import inspect
 import pandas as pd
 import asyncio
+from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
 import requests
+from typing import IO, Dict
+from io import BytesIO
+from langchain_core.messages import HumanMessage, SystemMessage
+from langgraph.graph import MessagesState
+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt import tools_condition
+from langgraph.prebuilt import ToolNode
+from pytube import YouTube
+import base64
+from google.ai.generativelanguage_v1beta.types import Tool as GenAITool
+from google.ai.generativelanguage_v1beta.types import FileData
 # (Keep Constants as is)
 # --- Constants ---
 # Agent capabilities required: Search the web, listen to audio recordings, watch YouTube videos (process the footage, not the transcript), work with Excel spreadsheets
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+def get_file(task_id: str) -> IO:
+    '''
+    Downloads the file associated with the given task_id, if one exists and is mapped.
+    If the question mentions an attachment, use this function.
+    Args:
+        task_id: Id of the question.
+    Returns:
+        The file associated with the question.
+    '''
+    file_request = requests.get(url=f'https://agents-course-unit4-scoring.hf.space/files/{task_id}')
+    file_request.raise_for_status()
+    return BytesIO(file_request.content)
+def analyse_excel(task_id: str) -> Dict[str, float]:
+    '''
+    Analyzes the Excel file associated with the given task_id and returns the sum of each numeric column.
+    Args:
+        task_id: Id of the question.
+    Returns:
+        A dictionary with the sum of each numeric column.
+    '''
+    excel_file = get_file(task_id)
+    df = pd.read_excel(excel_file, sheet_name=0)
+    return df.select_dtypes(include='number').sum().to_dict()
+def add_numbers(a: float, b: float) -> float:
+    '''
+    Adds two numbers together.
+    Args:
+        a: First number.
+        b: Second number.
+    Returns:
+        The sum of the two numbers.
+    '''
+    return a + b
+def transcribe_audio(task_id: str) -> HumanMessage:
+    '''
+    Opens an audio file and returns its content as a string.
+    Args:
+        file: The audio file to be opened.
+    Returns:
+        The content of the audio file as a string.
+    '''
+    audio_file = get_file(task_id)
+    if audio_file is None:
+        raise ValueError("No audio file found for the given task_id.")
+    # Encode the audio file to base64
+    audio_file.seek(0)  # Ensure the file pointer is at the beginning
+    encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
+    return HumanMessage(
+        content=[
+            {"type": "text", "text": "Transcribe the audio."},
+            {
+                "type": "media",
+                "data": encoded_audio,  # Use base64 string directly
+                "mime_type": "audio/mpeg",
+            },
+        ]
+    )
+def python_code(task_id: str) -> str:
+    '''
+    Returns the Python code associated with the given task_id.
+    Args:
+        task_id: Id of the question.
+    Returns:
+        The Python code associated with the question.
+    '''
+    code_request = requests.get(url=f'https://agents-course-unit4-scoring.hf.space/files/{task_id}')
+    code_request.raise_for_status()
+    return code_request.text
+def open_image(task_id: str) -> str:
+    '''
+    Opens an image file associated with the given task_id.
+    Args:
+        task_id: Id of the question.
+    Returns:
+        The base64 encoded string of the image file.
+    '''
+    image_file = get_file(task_id)
+    if image_file is None:
+        raise ValueError("No image file found for the given task_id.")
+    return base64.b64encode(image_file.read()).decode("utf-8")
+def open_youtube_video(url: str) -> HumanMessage:
+    '''
+    Opens a video file from the given URL.
+    Args:
+        url: The URL of the video file.
+    Returns:
+        HumanMessage instructions for the video file.
+    '''
+    video = FileData(url=url)
+    return HumanMessage(
+        content=[
+            {"type": "text", "text": "Watch the video and answer the question."},
+            {
+                "type": "media",
+                "data": video,
+                "mime_type": "video/mp4",
+            },
+        ]
+    )
+def google_search(query: str) -> str:
+    '''
+    Performs a Google search for the given query.
+    Args:
+        query: The search query.
+    Returns:
+        The search results as a string.
+    '''
+    llm = ChatGoogleGenerativeAI(
+            model="gemini-2.5-flash-preview-04-17",
+            max_tokens=8192,
+            temperature=0
+            )
+    response = llm.invoke(query,
+        tools=[GenAITool(google_search={})]
+    )
+    return response.content
 class BasicAgent:
     def __init__(self):
+        self.llm = ChatGoogleGenerativeAI(
+            model="gemini-2.5-flash-preview-04-17",
+            max_tokens=8192,
+            temperature=0
             )
+        self.tools = [get_file, analyse_excel, add_numbers, transcribe_audio, python_code, open_image, open_youtube_video
+         , google_search
+         ]
+        self.agent = self.llm.bind_tools(self.tools)
+        self.sys_msg = SystemMessage('''You are a general AI assistant. I will ask you a question. Only provide YOUR FINAL ANSWER and nothing else.
         YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
         If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
         If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
         If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+        You have access to multiple tools and should use as many as you need to answer the question.
+        If you are asked to analyze an Excel file, use the 'analyse_excel' tool.
+        If you are asked to download a file, use the 'get_file' tool.
+        If you are asked to add two numbers, use the 'add_numbers' tool. If you need to add more than two numbers, use the 'add_numbers'
+        tool multiple times.
+        If you are asked to transcribe an audio file, use the 'transcribe_audio' tool.
+        If you are asked to run a Python code, use the 'python_code' tool.
+        If you are asked to open an image, use the 'open_image' tool.
+        If you are asked to open a YouTube video, use the 'open_video' tool.
+        If the question requires a web search because your internal knowledge doesn't have the information, use the 'google_search' tool.
+                        ''')
+        # Graph
+        self.builder = StateGraph(MessagesState)
+        # Define nodes: these do the work
+        self.builder.add_node("assistant", self.assistant)
+        self.builder.add_node("tools", ToolNode(self.tools))
+        # Define edges: these determine how the control flow moves
+        self.builder.add_edge(START, "assistant")
+        self.builder.add_conditional_edges(
+            "assistant",
+            # If the latest message (result) from assistant is a tool call -> tools_condition routes to tools
+            # If the latest message (result) from assistant is a not a tool call -> tools_condition routes to END
+            tools_condition,
+        )
+        self.builder.add_edge("tools", "assistant")
+        self.react_graph = self.builder.compile()
+        print("BasicAgent initialized.")
+    def assistant(self, state: MessagesState):
+        return {"messages": [self.agent.invoke([self.sys_msg] + state["messages"])]}
+    async def __call__(self, question: str, task_id: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        fixed_answer = "This is a default answer."
+        await asyncio.sleep(4)
+        messages = self.react_graph.invoke({"messages": f'Task id: {task_id}\n {question}'})
+        return messages["messages"][-1].content if messages["messages"] else fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):