Final_Assignment_Template

Sleeping

App Files Files Community

mdicio commited on May 4, 2025

Commit

35c8e46

1 Parent(s): e8cec3a

back to ggroq

Browse files

Files changed (2) hide show

agent.py +342 -53
app.py +9 -7

agent.py CHANGED Viewed

@@ -1,46 +1,265 @@
 import os
 from datasets import load_dataset
 from dotenv import load_dotenv
-from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.schema import Document
 from langchain.tools.retriever import create_retriever_tool
 from langchain.vectorstores import Chroma
 from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
 from langchain_community.tools.tavily_search import TavilySearchResults
-from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_core.tools import tool
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_groq import ChatGroq
-from langchain_huggingface import (ChatHuggingFace, HuggingFaceEmbeddings,
-                                   HuggingFaceEndpoint)
 from langgraph.graph import START, MessagesState, StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
-from huggingface_hub import login
 login(token=os.environ["HUGGINGFACE_TOKEN"])
 load_dotenv()
 @tool
-def calculator(query: str) -> str:
-    """Perform basic arithmetic operations based on the provided query.
     Args:
-        query: A mathematical query as a string, e.g., '2 + 2' or '5 * 6'."""
     try:
-        # Evaluate the mathematical expression
-        result = eval(query)
-        return {"calculator_result": str(result)}
     except Exception as e:
-        return {"error": f"Error evaluating the expression: {str(e)}"}
 @tool
 def wiki_search(query: str) -> str:
     """Search Wikipedia for a query and return maximum 2 results.
     Args:
         query: The search query."""
     search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
@@ -56,7 +275,6 @@ def wiki_search(query: str) -> str:
 @tool
 def web_search(query: str) -> str:
     """Search Tavily for a query and return maximum 3 results.
     Args:
         query: The search query."""
     search_docs = TavilySearchResults(max_results=3).invoke(query=query)
@@ -70,9 +288,8 @@ def web_search(query: str) -> str:
 @tool
-def arvix_search(query: str) -> str:
     """Search Arxiv for a query and return maximum 3 result.
     Args:
         query: The search query."""
     search_docs = ArxivLoader(query=query, load_max_docs=3).load()
@@ -82,7 +299,74 @@ def arvix_search(query: str) -> str:
             for doc in search_docs
         ]
     )
-    return {"arvix_results": formatted_search_docs}
 system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
@@ -100,7 +384,13 @@ embeddings = HuggingFaceEmbeddings(
 )  #  dim=768
 # Load the GAIA validation dataset
-dataset = load_dataset("gaia-benchmark/GAIA", name="2023_level1", split="validation", trust_remote_code=True, cache_dir = "ragdata")
 # Extract questions and their answers
 documents = []
@@ -120,7 +410,7 @@ for entry in dataset:
     documents.append(Document(page_content=question, metadata=metadata))
 # Insert the documents into Chroma
-vectorstore = Chroma.from_documents(
     documents=documents,
     embedding=embeddings,
     collection_name="gaia_validation",
@@ -128,17 +418,29 @@ vectorstore = Chroma.from_documents(
 )
 create_retriever_tool = create_retriever_tool(
-    retriever=vectorstore.as_retriever(),
     name="Question Search",
     description="A tool to retrieve similar questions from a vector store.",
 )
 tools = [
-    calculator,
-    wiki_search,
     web_search,
-    arvix_search,
 ]
@@ -156,10 +458,10 @@ def build_graph(provider: str = "groq"):
         )  # optional : qwen-qwq-32b gemma2-9b-it
     elif provider == "huggingface":
         # TODO: Add huggingface endpoint
-        llm=HuggingFaceEndpoint(
-        repo_id="Meta-DeepLearning/llama-2-7b-chat-hf",
-        temperature=0,
-    )
     else:
         raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
     # Bind tools to LLM
@@ -172,35 +474,22 @@ def build_graph(provider: str = "groq"):
     def retriever(state: MessagesState):
         """Retriever node"""
-        similar_question = vectorstore.similarity_search(state["messages"][0].content)
         example_msg = HumanMessage(
             content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
         )
         return {"messages": [sys_msg] + state["messages"] + [example_msg]}
     builder = StateGraph(MessagesState)
-    builder.add_node("retriever", retriever)
-    builder.add_node("assistant", assistant)
-    builder.add_node("tools", ToolNode(tools))
-    builder.add_edge(START, "retriever")
-    builder.add_edge("retriever", "assistant")
-    builder.add_conditional_edges(
-        "assistant",
-        tools_condition,
-    )
-    builder.add_edge("tools", "assistant")
-    # Compile graph
-    return builder.compile()
-# test
-if __name__ == "__main__":
-    question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
-    # Build the graph
-    graph = build_graph(provider="groq")
-    # Run the graph
-    messages = [HumanMessage(content=question)]
-    messages = graph.invoke({"messages": messages})
-    for m in messages["messages"]:
-        m.pretty_print()

+import cmath
+import json
 import os
+import re
+import tempfile
+import uuid
+from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse
+import numpy as np
+import pandas as pd
+import pytesseract
+import requests
+from code_interpreter import CodeInterpreter
+from dotenv import load_dotenv
+from PIL import Image, ImageDraw, ImageEnhance, ImageFilter, ImageFont
+interpreter_instance = CodeInterpreter()
+from image_processing import *
+"""Langraph"""
 from datasets import load_dataset
 from dotenv import load_dotenv
+from huggingface_hub import login
 from langchain.schema import Document
 from langchain.tools.retriever import create_retriever_tool
 from langchain.vectorstores import Chroma
 from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
+from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
 from langchain_core.tools import tool
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_groq import ChatGroq
+from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
 from langgraph.graph import START, MessagesState, StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
+from supabase.client import Client, create_client
 login(token=os.environ["HUGGINGFACE_TOKEN"])
 load_dotenv()
+@tool
+def multiply(a: float, b: float) -> float:
+    """
+    Multiplies two numbers.
+    Args:
+        a (float): the first number
+        b (float): the second number
+    """
+    return a * b
+@tool
+def add(a: float, b: float) -> float:
+    """
+    Adds two numbers.
+    Args:
+        a (float): the first number
+        b (float): the second number
+    """
+    return a + b
+@tool
+def subtract(a: float, b: float) -> int:
+    """
+    Subtracts two numbers.
+    Args:
+        a (float): the first number
+        b (float): the second number
+    """
+    return a - b
+@tool
+def divide(a: float, b: float) -> float:
+    """
+    Divides two numbers.
+    Args:
+        a (float): the first float number
+        b (float): the second float number
+    """
+    if b == 0:
+        raise ValueError("Cannot divided by zero.")
+    return a / b
 @tool
+def modulus(a: int, b: int) -> int:
+    """
+    Get the modulus of two numbers.
+    Args:
+        a (int): the first number
+        b (int): the second number
+    """
+    return a % b
+@tool
+def power(a: float, b: float) -> float:
+    """
+    Get the power of two numbers.
+    Args:
+        a (float): the first number
+        b (float): the second number
+    """
+    return a**b
+@tool
+def square_root(a: float) -> float | complex:
+    """
+    Get the square root of a number.
+    Args:
+        a (float): the number to get the square root of
+    """
+    if a >= 0:
+        return a**0.5
+    return cmath.sqrt(a)
+### =============== DOCUMENT PROCESSING TOOLS =============== ###
+@tool
+def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
+    """
+    Save content to a file and return the path.
     Args:
+        content (str): the content to save to the file
+        filename (str, optional): the name of the file. If not provided, a random name file will be created.
+    """
+    temp_dir = tempfile.gettempdir()
+    if filename is None:
+        temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
+        filepath = temp_file.name
+    else:
+        filepath = os.path.join(temp_dir, filename)
+    with open(filepath, "w") as f:
+        f.write(content)
+    return f"File saved to {filepath}. You can read this file to process its contents."
+@tool
+def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
+    """
+    Download a file from a URL and save it to a temporary location.
+    Args:
+        url (str): the URL of the file to download.
+        filename (str, optional): the name of the file. If not provided, a random name file will be created.
+    """
     try:
+        # Parse URL to get filename if not provided
+        if not filename:
+            path = urlparse(url).path
+            filename = os.path.basename(path)
+            if not filename:
+                filename = f"downloaded_{uuid.uuid4().hex[:8]}"
+        # Create temporary file
+        temp_dir = tempfile.gettempdir()
+        filepath = os.path.join(temp_dir, filename)
+        # Download the file
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        # Save the file
+        with open(filepath, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        return f"File downloaded to {filepath}. You can read this file to process its contents."
     except Exception as e:
+        return f"Error downloading file: {str(e)}"
+@tool
+def extract_text_from_image(image_path: str) -> str:
+    """
+    Extract text from an image using OCR library pytesseract (if available).
+    Args:
+        image_path (str): the path to the image file.
+    """
+    try:
+        # Open the image
+        image = Image.open(image_path)
+        # Extract text from the image
+        text = pytesseract.image_to_string(image)
+        return f"Extracted text from image:\n\n{text}"
+    except Exception as e:
+        return f"Error extracting text from image: {str(e)}"
+@tool
+def analyze_csv_file(file_path: str, query: str) -> str:
+    """
+    Analyze a CSV file using pandas and answer a question about it.
+    Args:
+        file_path (str): the path to the CSV file.
+        query (str): Question about the data
+    """
+    try:
+        # Read the CSV file
+        df = pd.read_csv(file_path)
+        # Run various analyses based on the query
+        result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
+        result += f"Columns: {', '.join(df.columns)}\n\n"
+        # Add summary statistics
+        result += "Summary statistics:\n"
+        result += str(df.describe())
+        return result
+    except Exception as e:
+        return f"Error analyzing CSV file: {str(e)}"
+@tool
+def analyze_excel_file(file_path: str, query: str) -> str:
+    """
+    Analyze an Excel file using pandas and answer a question about it.
+    Args:
+        file_path (str): the path to the Excel file.
+        query (str): Question about the data
+    """
+    try:
+        # Read the Excel file
+        df = pd.read_excel(file_path)
+        # Run various analyses based on the query
+        result = (
+            f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
+        )
+        result += f"Columns: {', '.join(df.columns)}\n\n"
+        # Add summary statistics
+        result += "Summary statistics:\n"
+        result += str(df.describe())
+        return result
+    except Exception as e:
+        return f"Error analyzing Excel file: {str(e)}"
+### ============== IMAGE PROCESSING AND GENERATION TOOLS =============== ###
 @tool
 def wiki_search(query: str) -> str:
     """Search Wikipedia for a query and return maximum 2 results.
     Args:
         query: The search query."""
     search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
 @tool
 def web_search(query: str) -> str:
     """Search Tavily for a query and return maximum 3 results.
     Args:
         query: The search query."""
     search_docs = TavilySearchResults(max_results=3).invoke(query=query)
 @tool
+def arxiv_search(query: str) -> str:
     """Search Arxiv for a query and return maximum 3 result.
     Args:
         query: The search query."""
     search_docs = ArxivLoader(query=query, load_max_docs=3).load()
             for doc in search_docs
         ]
     )
+    return {"arxiv_results": formatted_search_docs}
+### =============== CODE INTERPRETER TOOLS =============== ###
+@tool
+def execute_code_multilang(code: str, language: str = "python") -> str:
+    """Execute code in multiple languages (Python, Bash, SQL, C, Java) and return results.
+    Args:
+        code (str): The source code to execute.
+        language (str): The language of the code. Supported: "python", "bash", "sql", "c", "java".
+    Returns:
+        A string summarizing the execution results (stdout, stderr, errors, plots, dataframes if any).
+    """
+    supported_languages = ["python", "bash", "sql", "c", "java"]
+    language = language.lower()
+    if language not in supported_languages:
+        return f"❌ Unsupported language: {language}. Supported languages are: {', '.join(supported_languages)}"
+    result = interpreter_instance.execute_code(code, language=language)
+    response = []
+    if result["status"] == "success":
+        response.append(f"✅ Code executed successfully in **{language.upper()}**")
+        if result.get("stdout"):
+            response.append(
+                "\n**Standard Output:**\n```\n" + result["stdout"].strip() + "\n```"
+            )
+        if result.get("stderr"):
+            response.append(
+                "\n**Standard Error (if any):**\n```\n"
+                + result["stderr"].strip()
+                + "\n```"
+            )
+        if result.get("result") is not None:
+            response.append(
+                "\n**Execution Result:**\n```\n"
+                + str(result["result"]).strip()
+                + "\n```"
+            )
+        if result.get("dataframes"):
+            for df_info in result["dataframes"]:
+                response.append(
+                    f"\n**DataFrame `{df_info['name']}` (Shape: {df_info['shape']})**"
+                )
+                df_preview = pd.DataFrame(df_info["head"])
+                response.append("First 5 rows:\n```\n" + str(df_preview) + "\n```")
+        if result.get("plots"):
+            response.append(
+                f"\n**Generated {len(result['plots'])} plot(s)** (Image data returned separately)"
+            )
+    else:
+        response.append(f"❌ Code execution failed in **{language.upper()}**")
+        if result.get("stderr"):
+            response.append(
+                "\n**Error Log:**\n```\n" + result["stderr"].strip() + "\n```"
+            )
+    return "\n".join(response)
 system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
 )  #  dim=768
 # Load the GAIA validation dataset
+dataset = load_dataset(
+    "gaia-benchmark/GAIA",
+    name="2023_level1",
+    split="validation",
+    trust_remote_code=True,
+    cache_dir="ragdata",
+)
 # Extract questions and their answers
 documents = []
     documents.append(Document(page_content=question, metadata=metadata))
 # Insert the documents into Chroma
+vector_store = Chroma.from_documents(
     documents=documents,
     embedding=embeddings,
     collection_name="gaia_validation",
 )
 create_retriever_tool = create_retriever_tool(
+    retriever=vector_store.as_retriever(),
     name="Question Search",
     description="A tool to retrieve similar questions from a vector store.",
 )
 tools = [
     web_search,
+    wiki_search,
+    arxiv_search,
+    multiply,
+    add,
+    subtract,
+    divide,
+    modulus,
+    power,
+    square_root,
+    save_and_read_file,
+    download_file_from_url,
+    extract_text_from_image,
+    analyze_csv_file,
+    analyze_excel_file,
+    execute_code_multilang,
 ]
         )  # optional : qwen-qwq-32b gemma2-9b-it
     elif provider == "huggingface":
         # TODO: Add huggingface endpoint
+        llm = HuggingFaceEndpoint(
+            repo_id="Meta-DeepLearning/llama-2-7b-chat-hf",
+            temperature=0,
+        )
     else:
         raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
     # Bind tools to LLM
     def retriever(state: MessagesState):
         """Retriever node"""
+        similar_question = vector_store.similarity_search(state["messages"][0].content)
         example_msg = HumanMessage(
             content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
         )
         return {"messages": [sys_msg] + state["messages"] + [example_msg]}
     builder = StateGraph(MessagesState)
+    builder.add_node('retriever', retriever)
+    builder.add_node('assistant', assistant)
+    builder.add_node('tools', ToolNode(tools))
+    builder.add_edge(START, 'retriever')
+    builder.add_edge('retriever', 'assistant')
+    builder.add_conditional_edges('assistant', tools_condition)
+    builder.add_edge('tools', 'assistant')
+    graph = builder.compile()
+    return graph

app.py CHANGED Viewed

@@ -17,15 +17,17 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
-        self.graph = build_graph(provider="groq")
-        print("✅ Agent initialized.")
     def __call__(self, question: str) -> str:
-        print(f"📨 Received question: {question[:60]}...")
         messages = [HumanMessage(content=question)]
-        result = self.graph.invoke({"messages": messages})
-        return result["messages"][-1].content  # Simplify if needed
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """

 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
+        print("BasicAgent initialized.")
+        self.graph = build_graph(provider = "groq")
     def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # fixed_answer = "This is a default answer."
+        # print(f"Agent returning fixed answer: {fixed_answer}")
+        # return fixed_answer
         messages = [HumanMessage(content=question)]
+        messages = self.graph.invoke({'messages': messages})
+        ans = messages['messages'][-1].content
+        return ans[14:]
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """