Final_Assignment_Agents_Course

Sleeping

App Files Files Community

David commited on May 28, 2025

Commit

3f771a9

1 Parent(s): edf3100

Still implementing and trying

Browse files

Files changed (5) hide show

agent.py +125 -21
app.py +12 -5
gaia_system_prompt.py +18 -1
requirements.txt +6 -0
tools.py +127 -9

agent.py CHANGED Viewed

@@ -1,50 +1,154 @@
 from llama_index.llms.google_genai import GoogleGenAI
 from llama_index.tools.arxiv import ArxivToolSpec
 from llama_index.tools.wikipedia import WikipediaToolSpec
-from llama_index.tools.duckduckgo import DuckDuckGoSearchResultsToolSpec
 from llama_index.core.tools import FunctionTool
-from llama_index.core.agent.workflow import AgentWorkflow
 from tools import interpret_python_math_code
-from gaia_system_prompt import GAIA_SYSTEM_PROMPT
 import os
 GEMINI_API_KEY = os.getenv("GEMINI_TOKEN")
 GEMINI_MODEL_NAME = "gemini-2.5-flash-preview-04-17"
 class FinalAgent:
     def __init__(self):
         # LLM Initialization
-        self.llm = GoogleGenAI(model=GEMINI_MODEL_NAME, api_key=GEMINI_API_KEY)
         # Tool Initialization
         self.tools = [
             FunctionTool.from_defaults(
-                func=interpret_python_math_code,
                 name="InterpretPythonMathCode",
                 description="Interprets Python code for mathematical expressions."
-            ),
-            DuckDuckGoSearchResultsToolSpec(),
-            WikipediaToolSpec(),
-            ArxivToolSpec()
         ]
         # Agent Workflow Initialization
-        self.agent = AgentWorkflow(
             llm=self.llm,
-            tools=self.tools,
-            system_prompt=GAIA_SYSTEM_PROMPT
         )
         print("FinalAgent initialized.")
-    def __call__(self, question: str) -> str:
-        # Example
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        # Implement agent logic here
-        response = self.agent.run(question)
-        return response

 from llama_index.llms.google_genai import GoogleGenAI
+from llama_index.llms.gemini import Gemini
+from llama_index.llms.groq import Groq
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from llama_index.tools.arxiv import ArxivToolSpec
 from llama_index.tools.wikipedia import WikipediaToolSpec
+from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
 from llama_index.core.tools import FunctionTool
+from llama_index.core.agent.workflow import AgentWorkflow, ReActAgent
+from llama_index.llms.lmstudio import LMStudio
+from llama_index.core.agent.workflow import (
+    AgentStream,
+    AgentOutput
+)
+from gradio import ChatMessage
+from llama_index.core.base.llms.types import ChatMessage as llama_index_chat_message
 from tools import interpret_python_math_code
+from gaia_system_prompt import SYSTEM_PROMPT as GAIA_SYSTEM_PROMPT
 import os
+import asyncio
+TIMEOUT=180 # Timeout for agent execution in seconds
 GEMINI_API_KEY = os.getenv("GEMINI_TOKEN")
+GROQ_API_KEY = os.getenv("GROQ_TOKEN")
+GEMINI_OPENAI_API_DIR = "https://generativelanguage.googleapis.com/v1beta/openai/"
 GEMINI_MODEL_NAME = "gemini-2.5-flash-preview-04-17"
+LMSTUDIO_MODEL_NAME = "gemma-3-12B-it-qat-GGUF"
+API_DIR = "http://host.docker.internal:1234/v1"  # LM Studio API URL
 class FinalAgent:
     def __init__(self):
         # LLM Initialization
+        # self.llm = GoogleGenAI(model=GEMINI_MODEL_NAME, api_key=GEMINI_API_KEY)
+        # self.llm = Gemini(model=GEMINI_MODEL_NAME, api_key=GEMINI_API_KEY)
+        # self.llm = Groq(model="meta-llama/llama-4-maverick-17b-128e-instruct", api_key=GROQ_API_KEY)
+        # self.llm = LMStudio(model_name=LMSTUDIO_MODEL_NAME, base_url=API_DIR, request_timeout=180, temperature=0.1)
+        self.llm = HuggingFaceInferenceAPI(model_name="meta-llama/Llama-3.3-70B-Instruct", timeout=TIMEOUT)
         # Tool Initialization
         self.tools = [
             FunctionTool.from_defaults(
+                fn=interpret_python_math_code,
                 name="InterpretPythonMathCode",
                 description="Interprets Python code for mathematical expressions."
+            )
         ]
+        self.tools.extend(
+            ArxivToolSpec().to_tool_list()
+        )
+        self.tools.extend(
+            WikipediaToolSpec().to_tool_list()
+        )
+        self.tools.extend(
+            DuckDuckGoSearchToolSpec().to_tool_list()
+        )
         # Agent Workflow Initialization
+        # self.agent = AgentWorkflow.from_tools_or_functions(
+        #     tools_or_functions=self.tools,
+        #     llm=self.llm,
+        #     system_prompt=GAIA_SYSTEM_PROMPT,
+        #     timeout=TIMEOUT
+        # )
+        self.agent = ReActAgent(
             llm=self.llm,
+            verbose=True,
+            max_iterations=5,
+            system_prompt=GAIA_SYSTEM_PROMPT,
+            tools=self.tools
         )
         print("FinalAgent initialized.")
+    # async def __call__(self, question: str) -> str:
+    #     # Example
+    #     print(f"Agent received question: {question}")
+    #     # fixed_answer = "This is a default answer."
+    #     # print(f"Agent returning fixed answer: {fixed_answer}")
+    #     # response = fixed_answer
+    #     # Implement agent logic here
+    #     response = ""
+    #     # Run the agent with the question
+    #     stream = await self.agent.run(question)
+    #     response = stream.response.content
+    #     # async for event in stream.stream_events():
+    #     #         if isinstance(event, AgentStream):
+    #     #              # Check if delta is empty
+    #     #             if event.raw["choices"][0]["delta"] != {}:
+    #     #                 response += event.raw["choices"][0]["delta"]["content"]
+    #     print(f"Agent response: {response}")
+    #     return response
+    async def __call__(self, question: str) -> str:
+        print(f"Agent received question: {question}")
+        response_str = ""
+        try:
+            # Use arun for an async method.
+            agent_chat_response = await self.agent.run(question)
+            potential_response_obj = agent_chat_response.response
+            if isinstance(potential_response_obj, ChatMessage):
+                # If it's a ChatMessage, its .content attribute should hold the string
+                print(f"DEBUG: Response object is ChatMessage. Role: {potential_response_obj.role}")
+                response_str = potential_response_obj.content
+                if response_str is None: # Handle cases where content might be None
+                    print("DEBUG: ChatMessage content is None, defaulting to empty string.")
+                    response_str = ""
+            elif isinstance(potential_response_obj, str):
+                # If it's already a string
+                print("DEBUG: Response object is str.")
+                response_str = potential_response_obj
+            elif isinstance(potential_response_obj, llama_index_chat_message):
+                # If it's a llama_index ChatMessage, use its .content attribute
+                print(f"DEBUG: Response object is llama_index ChatMessage. Role: {potential_response_obj.role}")
+                response_str = potential_response_obj.content
+                if response_str is None:
+                    print("DEBUG: llama_index ChatMessage content is None, defaulting to empty string.")
+                    response_str = ""
+            else:
+                # Fallback if it's some other type
+                print(f"Warning: Agent response was of unexpected type: {type(potential_response_obj)}. Converting to string.")
+                response_str = str(potential_response_obj)
+        except Exception as e:
+            print(f"Error during agent execution with LLM {self.llm.__class__.__name__}: {e}")
+            # Depending on requirements, you might want to return an error message or re-raise
+            response_str = f"Agent error: {e}"
+        # Get the agent's final response string from FINAL ANSWER:
+        if "FINAL ANSWER: " in response_str:
+            response_str = response_str.split("FINAL ANSWER: ")[-1].strip()
+        else:
+            print("Warning: 'FINAL ANSWER:' not found in response string. Returning full response.")
+        print(f"Agent final response: {response_str}")
+        return response_str
+async def main():
+    # Example usage
+    agent = FinalAgent()
+    question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
+    answer = await agent(question)
+    print(f"Final answer: {answer}")
+if __name__ == "__main__":
+    asyncio.run(main())

app.py CHANGED Viewed

@@ -4,6 +4,9 @@ import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -19,7 +22,7 @@ class BasicAgent:
         print(f"Agent returning fixed answer: {fixed_answer}")
         return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
@@ -40,7 +43,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -80,7 +85,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
@@ -171,7 +176,7 @@ with gr.Blocks() as demo:
         outputs=[status_output, results_table]
     )
-if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
@@ -193,4 +198,6 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import inspect
 import pandas as pd
+from agent import FinalAgent
+import asyncio
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
         print(f"Agent returning fixed answer: {fixed_answer}")
         return fixed_answer
+async def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        # agent = BasicAgent()
+        agent = FinalAgent()  # Use your custom agent class here
+        print(f"Agent instantiated successfully: {agent}")
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = await agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
         outputs=[status_output, results_table]
     )
+async def main():
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)
+if __name__ == "__main__":
+    asyncio.run(main())

gaia_system_prompt.py CHANGED Viewed

@@ -3,4 +3,21 @@ FINAL ANSWER: [YOUR FINAL ANSWER].
 YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
 If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
 If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
-If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""

 YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
 If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
 If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
+SYSTEM_PROMPT = """
+You are a general AI assistant. Answer my question directly, following these strict rules. Your entire output must be *only* the template below.
+**Rules:**
+*   No thoughts, explanations, or extra text.
+*   The *only* output is: FINAL ANSWER: [YOUR SHORT ANSWER]
+*   [YOUR SHORT ANSWER] is a number, string, or comma-separated list.
+*   Numbers: No commas, no units (unless specified).
+*   Strings: No articles, no abbreviations, digits as words (unless specified).
+*   Lists: Apply number/string rules to items.
+**Example:**
+User: What is the capital of France?
+Assistant:
+FINAL ANSWER: Paris
+"""

requirements.txt CHANGED Viewed

@@ -3,7 +3,13 @@ requests
 numpy
 pandas
 scipy
 llama-index
 llama-index-llms-gemini
 llama-index-llms-google-genai
 llama-index-utils-workflow

 numpy
 pandas
 scipy
+groq
 llama-index
+llama-index-llms-huggingface
+llama-index-llms-huggingface-api
+llama-index-llms-groq
+llama-index-utils-workflow
+llama-index-llms-lmstudio
 llama-index-llms-gemini
 llama-index-llms-google-genai
 llama-index-utils-workflow

tools.py CHANGED Viewed

@@ -5,6 +5,12 @@ import sys
 import numpy as np
 import pandas as pd
 import scipy
 ALLOWED_MODULES = {"numpy", "pandas", "scipy"}
@@ -113,12 +119,124 @@ def interpret_python_math_code(python_code: str) -> str:
             sys.stdout = old_stdout
-# Example usage:
-if __name__ == "__main__":
-    code = """
-import numpy as np
-# import os # This should trigger an error since 'os' is not allowed
-arr = np.array([1, 2, 3, 4, 5])
-_result = arr.mean()
-"""
-    print(interpret_python_math_code(code))

 import numpy as np
 import pandas as pd
 import scipy
+from groq import Groq
+from pathlib import Path
+import pandas as pd
+import mimetypes
+import base64
 ALLOWED_MODULES = {"numpy", "pandas", "scipy"}
             sys.stdout = old_stdout
+## STT tool
+def convert_audio_to_text(path_to_audio: str) -> str:
+    """
+    Converts speech from an audio file into text.
+    Args:
+        path_to_audio (str): The path to the audio file to be transcribed.
+    Returns:
+        str: The transcribed text content of the audio file.
+    """
+    # Validate audio file
+    if not isinstance(path_to_audio, str):
+        raise TypeError(
+            "Parameter 'path_to_audio' must be a string containing the file path."
+        )
+    path = Path(path_to_audio).expanduser().resolve()
+    if not path.is_file():
+        raise FileNotFoundError(f"No such audio file: {path}")
+    # Initialize the Groq client
+    client = Groq()
+    # Open the audio file
+    with open(path_to_audio, "rb") as audio_file:
+        # Create a transcription of the audio file
+        transcription = client.audio.transcriptions.create(
+            file=audio_file,
+            model="whisper-large-v3-turbo",
+            response_format="text", # Returns plain text instead of JSON
+            language="en",
+            temperature=0.1
+        )
+    return transcription
+## Analyze image tool
+def analyze_image(path_to_image: str, question: str) -> str:
+    """
+    Analyzes an image and generates a response to a given question based on the image's content.
+    Args:
+        path_to_image (str): The path to the image file to be analyzed.
+        question (str): The question to be answered, based on the contents of the image.
+    Returns:
+        str: The response from a VLM, typically a textual analysis or description based on the image.
+    """
+    def encode_image(image_path):
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+    # Get the MIME type (e.g., image/png, image/jpeg)
+    mime_type, _ = mimetypes.guess_type(path_to_image)
+    if mime_type is None:
+        raise ValueError("Unsupported file type. Please provide a valid image.")
+    base64_image = encode_image(path_to_image)
+    # Initialize the Groq client
+    client = GroqClient()
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": question},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:{mime_type};base64,{base64_image}",
+                        },
+                    },
+                ],
+            }
+        ],
+        model="meta-llama/llama-4-scout-17b-16e-instruct",
+    )
+    return chat_completion.choices[0].message.content
+## Read .csv file tool
+def read_csv_file(path_to_csv: str) -> str:
+    """
+    Reads a CSV file from the specified path and returns its content as plain text.
+    Args:
+    path_to_csv (str): The file path to the CSV file.
+    Returns:
+    str: Content of the CSV file as plain text.
+    """
+    try:
+        # Read the CSV file using pandas
+        df = pd.read_csv(path_to_csv)
+        # Return df as plain tect
+        return df.to_string(index=False)
+    except Exception as e:
+        return f"Error reading the CSV file: {e}"
+## Read .xlsx file tool
+def read_xlsx_file(path_to_xlsx: str) -> str:
+    """
+    Reads a XLSX file from the specified path and returns its content as plain text.
+    Args:
+    path_to_xlsx (str): The file path to the XLSX file.
+    Returns:
+    str: Content of the XLSX file as plain text.
+    """
+    try:
+        # Read the XLSX file using pandas
+        df = pd.read_excel(path_to_xlsx)
+        # Return df as plain tect
+        return df.to_string(index=False)
+    except Exception as e:
+        return f"Error reading the XLSX file: {e}"