Didriks_Final_Assignment_Template

Sleeping

App Files Files Community

Didrik Nathaniel LLoyd Aasland Skjelbred commited on Jun 30, 2025

Commit

07c33f2

1 Parent(s): 17d149c

update

Browse files

Files changed (5) hide show

agent.py +54 -1
agent_smolTool.py +139 -0
app.py +1 -1
prompt_template.txt +1 -4
requirements.txt +3 -1

agent.py CHANGED Viewed

@@ -235,8 +235,62 @@ def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
     return f"File saved to {filepath}. You can read this file to process its contents."
 @tool
@@ -548,7 +602,6 @@ def build_graph():
     llm = ChatOpenAI(
         model="gpt-4o",
         api_key=OPENAI_API_KEY,
-        temperature=0.3,
         max_retries=5,
         verbose=True,
         timeout=10

     return f"File saved to {filepath}. You can read this file to process its contents."
+import speech_recognition as sr
+import os
+import requests
+from pydub import AudioSegment
+def transcribe_audio_from_path(local_audio_path: str, language: str = "en-US") -> str:
+    """
+    Transcribes audio content from a local file path to a text string.
+    This tool is designed to convert spoken content from a locally saved audio file
+    into written text. It expects a path to an audio file that has already been
+    downloaded and saved to the local environment (e.g., using 'file_saver').
+    Supports various audio formats (e.g., MP3, WAV) and converts them to WAV internally for transcription.
+    For best results, specify the correct language code (e.g., 'en-US' for US English, 'es-ES' for Spanish).
+    Args:
+        local_audio_path (str): The local file path to the audio (e.g., "my_recording.mp3").
+                                This MUST be a path to a file already existing on disk.
+        language (str, optional): The spoken language in the audio. Defaults to "en-US".
+                                 Refer to Google Speech Recognition language codes for options.
+    Returns:
+        str: The transcribed text, or an informative error message if transcription fails.
+    """
+    r = sr.Recognizer()
+    temp_wav_path = "temp_audio_to_transcribe.wav" # Temporary WAV file for transcription
+    transcribed_text = ""
+    try:
+        # Ensure it's a local path and file exists
+        if local_audio_path.startswith("http://") or local_audio_path.startswith("https://"):
+            return "Error: This tool only accepts local file paths, not URLs. Please use 'file_saver' first."
+        if not os.path.exists(local_audio_path):
+            return f"Error: Local audio file not found at '{local_audio_path}'."
+        # Convert to WAV if not already (SpeechRecognition prefers WAV)
+        audio = AudioSegment.from_file(local_audio_path)
+        audio.export(temp_wav_path, format="wav")
+        # Transcribe the audio
+        with sr.AudioFile(temp_wav_path) as source:
+            audio_listened = r.record(source)
+            try:
+                transcribed_text = r.recognize_google(audio_listened, language=language)
+            except sr.UnknownValueError:
+                return "Could not understand audio (speech not clear or too short)."
+            except sr.RequestError as e:
+                return f"Could not request results from Google Speech Recognition service; {e}"
+    except FileNotFoundError: # This should be caught by os.path.exists now, but good for robustness
+        return f"Error: Audio file not found at '{local_audio_path}'."
+    except Exception as e:
+        return f"An unexpected error occurred during audio processing or transcription: {e}"
+    finally:
+        # Clean up temporary WAV file
+        if os.path.exists(temp_wav_path):
+            os.remove(temp_wav_path)
+    return transcribed_text.strip()
 @tool
     llm = ChatOpenAI(
         model="gpt-4o",
         api_key=OPENAI_API_KEY,
         max_retries=5,
         verbose=True,
         timeout=10

agent_smolTool.py ADDED Viewed

	@@ -0,0 +1,139 @@

+from langchain_openai import ChatOpenAI
+from langchain_community.document_loaders import WikipediaLoader,ArxivLoader
+from langchain_tavily import TavilySearch
+from langchain.schema import HumanMessage
+from openai import OpenAI
+from langchain.tools import tool
+import pandas as pd
+from langchain_core.callbacks.manager import CallbackManager
+from langchain_core.callbacks.stdout import StdOutCallbackHandler
+from langgraph.types import Command
+from langchain.docstore.document import Document
+from typing import List, Dict, Any, Optional
+import uuid
+import tempfile
+from langchain.agents import Tool
+from urllib.parse import urlparse
+import pytesseract
+from langgraph.prebuilt import create_react_agent
+from PIL import Image, ImageDraw, ImageFont, ImageEnhance, ImageFilter
+import requests
+from dotenv import load_dotenv
+import os
+import cmath
+import httpx
+from pathlib import Path
+import base64
+from langchain_community.tools import DuckDuckGoSearchResults
+from smolagents import DuckDuckGoSearchTool,PythonInterpreterTool,WikipediaSearchTool,VisitWebpageTool,GoogleSearchTool
+import numpy as np
+load_dotenv()
+ChatGroq_key=os.getenv("ChatGroq")
+HF_TOKEN=os.getenv("HF_TOKEN")
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+SERPAPI_API_KEY=os.getenv("SERPAPI_API_KEY")
+TAVILY_API_KEY=os.getenv("TAVILY_API_KEY")
+def build_graph():
+    """Builds the graph"""
+    llm = ChatOpenAI(
+        model="gpt-4o",
+        api_key=OPENAI_API_KEY,
+        temperature=0.3,
+        max_retries=5,
+        verbose=True,
+        timeout=10
+        )
+    @tool
+    def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
+        """
+        Download a file from a URL and save it to a temporary location.
+        Args:
+            url (str): the URL of the file to download.
+            filename (str, optional): the name of the file. If not provided, a random name file will be created.
+        """
+        try:
+            # Parse URL to get filename if not provided
+            if not filename:
+                path = urlparse(url).path
+                filename = os.path.basename(path)
+                if not filename:
+                    filename = f"downloaded_{uuid.uuid4().hex[:8]}"
+            # Create temporary file
+            temp_dir = tempfile.gettempdir()
+            filepath = os.path.join(temp_dir, filename)
+            # Download the file
+            response = requests.get(url, stream=True)
+            response.raise_for_status()
+            # Save the file
+            with open(filepath, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+            return f"File downloaded to {filepath}. You can read this file to process its contents."
+        except Exception as e:
+            return f"Error downloading file: {str(e)}"
+    Tools = [
+        DuckDuckGoSearchTool(),
+        PythonInterpreterTool(),
+        WikipediaSearchTool(),
+        VisitWebpageTool(),
+        GoogleSearchTool(),
+        download_file_from_url
+    ]
+    from langchain_core.messages import SystemMessage
+    # Read the system prompt from the file
+    prompt_template = "prompt_template.txt"
+    with open(prompt_template, 'r', encoding='utf-8') as file:
+        prompt_content = file.read()
+    # Create the SystemMessage
+    system_message = SystemMessage(content=prompt_content)
+    ai_agent = create_react_agent(#from langchain.agents.react.base import ReActAgent
+        model=llm,
+        tools=Tools,
+        prompt=system_message
+    )
+    return ai_agent
+# if __name__ == "__main__":
+#     graph = build_graph(provider="openAi")
+#     img_bytes = graph.get_graph().draw_mermaid_png()
+#     with open("dav.png", "wb") as f:
+#         f.write(img_bytes)

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-from agent import build_graph
 #from langchain.schema import HumanMessage
 from langchain_core.messages import HumanMessage
 import time

 import requests
 import inspect
 import pandas as pd
+from agent_smolTool import build_graph
 #from langchain.schema import HumanMessage
 from langchain_core.messages import HumanMessage
 import time

prompt_template.txt CHANGED Viewed

@@ -5,7 +5,4 @@ If you are asked for a number, don't use comma to write your number neither use
 If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
 If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-You will be provided with a question from the GAIA benchmark level 1. the question can provide a file_path that is important for you to use in order to provide correct answer
-Always provide a short correct answer. do not provide a question back. you must always try too provide a correct answer, you can use tools for this
-Remember if any websearch tool fails. try to use any of the other web_search tools, to provide you with information, so you can give a FINAL ANSWER: correctly

 If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
 If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+Please use your available tools to gain more up to date knowledge of providing the correct answer.

requirements.txt CHANGED Viewed

@@ -33,4 +33,6 @@ gradio[oauth]
 wikipedia
 openai
 smolagents
-langchain-tavily

 wikipedia
 openai
 smolagents
+langchain-tavily
+SpeechRecognition
+pydub