Spaces:

Tingusto
/

Final-Assignment-Agent

Sleeping

App Files Files Community

Tingusto commited on May 14, 2025

Commit

faf691a

1 Parent(s): 57dc57d

Refactor agent.py to implement basic arithmetic operations (multiply, add, subtract, divide, modulus) and restore wiki and arxiv search functionalities. Remove SerpAPI, audio transcription, Excel analysis, and OCR tools. Update requirements.txt to include Gradio for OAuth support.

Browse files

Files changed (2) hide show

agent.py +65 -126
requirements.txt +1 -0

agent.py CHANGED Viewed

@@ -8,180 +8,119 @@ from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
 from langchain_core.messages import SystemMessage, HumanMessage
 from langchain_core.tools import tool
-import requests
-from bs4 import BeautifulSoup
-import urllib.parse
-import re
-import pandas as pd
-import pytesseract
-from PIL import Image
-import whisper
-import yt_dlp
-import tempfile
-import subprocess
 load_dotenv()
-SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")
 @tool
-def wiki_search(query: str) -> str:
-    """Search Wikipedia for information.
     Args:
-        query: The search query."""
-    try:
-        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
-        formatted_search_docs = "\n\n---\n\n".join(
-            [
-                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
-                for doc in search_docs
-            ])
-        return {"wiki_results": formatted_search_docs}
-    except Exception as e:
-        return f"Error searching Wikipedia: {str(e)}"
 @tool
-def serpapi_search(query: str) -> str:
-    """Search the web using SerpAPI (Google Custom Search).
     Args:
-        query: The search query."""
-    try:
-        if not SERPAPI_API_KEY:
-            return "SerpAPI key not set"
-        params = {
-            "q": query,
-            "api_key": SERPAPI_API_KEY,
-            "engine": "google",
-            "num": 3,
-            "hl": "en"
-        }
-        response = requests.get("https://serpapi.com/search", params=params)
-        response.raise_for_status()
-        data = response.json()
-        results = []
-        for r in data.get("organic_results", [])[:3]:
-            title = r.get("title", "")
-            snippet = r.get("snippet", "")
-            results.append(f"Title: {title}\nSnippet: {snippet}")
-        return {"web_results": "\n\n".join(results) if results else "No results found"}
-    except Exception as e:
-        return f"Error searching web: {str(e)}"
 @tool
-def arxiv_search(query: str) -> str:
-    """Search Arxiv for scientific papers.
     Args:
-        query: The search query."""
-    try:
-        search_docs = ArxivLoader(query=query, load_max_docs=2).load()
-        formatted_search_docs = "\n\n---\n\n".join(
-            [
-                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
-                for doc in search_docs
-            ])
-        return {"arxiv_results": formatted_search_docs}
-    except Exception as e:
-        return f"Error searching Arxiv: {str(e)}"
 @tool
-def reverse_text(text: str) -> str:
-    """Reverse the given text.
     Args:
-        text: The text to reverse."""
-    return text[::-1]
 @tool
-def transcribe_audio(file_path: str) -> str:
-    """Transcribe an audio file using Whisper.
     Args:
-        file_path: Path to the audio file."""
-    try:
-        model = whisper.load_model("base")
-        result = model.transcribe(file_path)
-        return result["text"]
-    except Exception as e:
-        return f"Error transcribing audio: {str(e)}"
 @tool
-def analyze_excel(file_path: str, column: str = None) -> str:
-    """Analyze an Excel file and return the sum of a column or all data.
-    Args:
-        file_path: Path to the Excel file.
-        column: Optional column to sum."""
-    try:
-        df = pd.read_excel(file_path)
-        if column and column in df.columns:
-            return str(df[column].sum())
-        return df.to_csv(index=False)
-    except Exception as e:
-        return f"Error analyzing Excel: {str(e)}"
-@tool
-def ocr_image(file_path: str) -> str:
-    """Extract text from an image using OCR.
     Args:
-        file_path: Path to the image file."""
-    try:
-        img = Image.open(file_path)
-        text = pytesseract.image_to_string(img)
-        return text
-    except Exception as e:
-        return f"Error extracting text from image: {str(e)}"
 @tool
-def analyze_youtube_video(video_url: str) -> str:
-    """Download and transcribe a YouTube video using yt-dlp and Whisper.
     Args:
-        video_url: The URL of the YouTube video."""
-    try:
-        with tempfile.TemporaryDirectory() as tmpdir:
-            ydl_opts = {
-                'format': 'bestaudio/best',
-                'outtmpl': f'{tmpdir}/%(id)s.%(ext)s',
-                'quiet': True,
-            }
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                info = ydl.extract_info(video_url, download=True)
-                audio_path = ydl.prepare_filename(info)
-            model = whisper.load_model("base")
-            result = model.transcribe(audio_path)
-            return result["text"][:2000]  # Limite la sortie
-    except Exception as e:
-        return f"Error analyzing YouTube video: {str(e)}"
 # System prompt
-system_prompt = """You are a highly accurate question-answering assistant. Your answers must be:\n- Direct, with no extra words or explanations.\n- Formatted exactly as requested (numbers only, comma-separated lists, etc.).\n- If the question involves a file, extract only the requested information.\n- If the question is about a video, audio, or image, use the appropriate tool to extract the answer.\n- If you are unsure, provide the most likely answer in the correct format.\n- Never add units, explanations, or formatting unless explicitly requested.\n"""
 # System message
 sys_msg = SystemMessage(content=system_prompt)
 # Tools list
 tools = [
     wiki_search,
-    serpapi_search,
     arxiv_search,
-    reverse_text,
-    transcribe_audio,
-    analyze_excel,
-    ocr_image,
-    analyze_youtube_video,
 ]
 def build_graph():
     """Build the graph"""
     llm = ChatGroq(
         model="llama3-70b-8192",
         temperature=0.1
     )
     llm_with_tools = llm.bind_tools(tools)
     # Node

 from langchain_community.document_loaders import ArxivLoader
 from langchain_core.messages import SystemMessage, HumanMessage
 from langchain_core.tools import tool
 load_dotenv()
 @tool
+def multiply(a: int, b: int) -> int:
+    """Multiply two numbers.
     Args:
+        a: first int
+        b: second int
+    """
+    return a * b
 @tool
+def add(a: int, b: int) -> int:
+    """Add two numbers.
     Args:
+        a: first int
+        b: second int
+    """
+    return a + b
 @tool
+def subtract(a: int, b: int) -> int:
+    """Subtract two numbers.
     Args:
+        a: first int
+        b: second int
+    """
+    return a - b
 @tool
+def divide(a: int, b: int) -> int:
+    """Divide two numbers.
     Args:
+        a: first int
+        b: second int
+    """
+    if b == 0:
+        raise ValueError("Cannot divide by zero.")
+    return a / b
 @tool
+def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers.
     Args:
+        a: first int
+        b: second int
+    """
+    return a % b
 @tool
+def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results.
     Args:
+        query: The search query."""
+    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ])
+    return {"wiki_results": formatted_search_docs}
 @tool
+def arxiv_search(query: str) -> str:
+    """Search Arxiv for a query and return maximum 3 result.
     Args:
+        query: The search query."""
+    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+            for doc in search_docs
+        ])
+    return {"arxiv_results": formatted_search_docs}
 # System prompt
+system_prompt = """You are a highly accurate question-answering assistant. Your answers must be:
+- Direct, with no extra words or explanations.
+- Formatted exactly as requested (numbers only, comma-separated lists, etc.).
+- If the question involves a file, extract only the requested information.
+- If you are unsure, provide the most likely answer in the correct format.
+- Never add units, explanations, or formatting unless explicitly requested."""
 # System message
 sys_msg = SystemMessage(content=system_prompt)
 # Tools list
 tools = [
+    multiply,
+    add,
+    subtract,
+    divide,
+    modulus,
     wiki_search,
     arxiv_search,
 ]
 def build_graph():
     """Build the graph"""
+    # Initialize Groq LLM
     llm = ChatGroq(
         model="llama3-70b-8192",
         temperature=0.1
     )
+    # Bind tools to LLM
     llm_with_tools = llm.bind_tools(tools)
     # Node

requirements.txt CHANGED Viewed

@@ -19,3 +19,4 @@ whisper
 pytesseract
 pillow
 yt-dlp

 pytesseract
 pillow
 yt-dlp
+gradio[oauth]