Didrik Nathaniel LLoyd Aasland Skjelbred commited on
Commit
07c33f2
·
1 Parent(s): 17d149c
Files changed (5) hide show
  1. agent.py +54 -1
  2. agent_smolTool.py +139 -0
  3. app.py +1 -1
  4. prompt_template.txt +1 -4
  5. requirements.txt +3 -1
agent.py CHANGED
@@ -235,8 +235,62 @@ def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
235
 
236
  return f"File saved to {filepath}. You can read this file to process its contents."
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
 
240
 
241
 
242
  @tool
@@ -548,7 +602,6 @@ def build_graph():
548
  llm = ChatOpenAI(
549
  model="gpt-4o",
550
  api_key=OPENAI_API_KEY,
551
- temperature=0.3,
552
  max_retries=5,
553
  verbose=True,
554
  timeout=10
 
235
 
236
  return f"File saved to {filepath}. You can read this file to process its contents."
237
 
238
+ import speech_recognition as sr
239
+ import os
240
+ import requests
241
+ from pydub import AudioSegment
242
+ def transcribe_audio_from_path(local_audio_path: str, language: str = "en-US") -> str:
243
+ """
244
+ Transcribes audio content from a local file path to a text string.
245
+ This tool is designed to convert spoken content from a locally saved audio file
246
+ into written text. It expects a path to an audio file that has already been
247
+ downloaded and saved to the local environment (e.g., using 'file_saver').
248
+ Supports various audio formats (e.g., MP3, WAV) and converts them to WAV internally for transcription.
249
+ For best results, specify the correct language code (e.g., 'en-US' for US English, 'es-ES' for Spanish).
250
+ Args:
251
+ local_audio_path (str): The local file path to the audio (e.g., "my_recording.mp3").
252
+ This MUST be a path to a file already existing on disk.
253
+ language (str, optional): The spoken language in the audio. Defaults to "en-US".
254
+ Refer to Google Speech Recognition language codes for options.
255
+ Returns:
256
+ str: The transcribed text, or an informative error message if transcription fails.
257
+ """
258
+ r = sr.Recognizer()
259
+ temp_wav_path = "temp_audio_to_transcribe.wav" # Temporary WAV file for transcription
260
+ transcribed_text = ""
261
 
262
+ try:
263
+ # Ensure it's a local path and file exists
264
+ if local_audio_path.startswith("http://") or local_audio_path.startswith("https://"):
265
+ return "Error: This tool only accepts local file paths, not URLs. Please use 'file_saver' first."
266
+
267
+ if not os.path.exists(local_audio_path):
268
+ return f"Error: Local audio file not found at '{local_audio_path}'."
269
+
270
+ # Convert to WAV if not already (SpeechRecognition prefers WAV)
271
+ audio = AudioSegment.from_file(local_audio_path)
272
+ audio.export(temp_wav_path, format="wav")
273
+
274
+ # Transcribe the audio
275
+ with sr.AudioFile(temp_wav_path) as source:
276
+ audio_listened = r.record(source)
277
+ try:
278
+ transcribed_text = r.recognize_google(audio_listened, language=language)
279
+ except sr.UnknownValueError:
280
+ return "Could not understand audio (speech not clear or too short)."
281
+ except sr.RequestError as e:
282
+ return f"Could not request results from Google Speech Recognition service; {e}"
283
+
284
+ except FileNotFoundError: # This should be caught by os.path.exists now, but good for robustness
285
+ return f"Error: Audio file not found at '{local_audio_path}'."
286
+ except Exception as e:
287
+ return f"An unexpected error occurred during audio processing or transcription: {e}"
288
+ finally:
289
+ # Clean up temporary WAV file
290
+ if os.path.exists(temp_wav_path):
291
+ os.remove(temp_wav_path)
292
 
293
+ return transcribed_text.strip()
294
 
295
 
296
  @tool
 
602
  llm = ChatOpenAI(
603
  model="gpt-4o",
604
  api_key=OPENAI_API_KEY,
 
605
  max_retries=5,
606
  verbose=True,
607
  timeout=10
agent_smolTool.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_community.document_loaders import WikipediaLoader,ArxivLoader
4
+ from langchain_tavily import TavilySearch
5
+ from langchain.schema import HumanMessage
6
+ from openai import OpenAI
7
+ from langchain.tools import tool
8
+ import pandas as pd
9
+
10
+ from langchain_core.callbacks.manager import CallbackManager
11
+ from langchain_core.callbacks.stdout import StdOutCallbackHandler
12
+ from langgraph.types import Command
13
+ from langchain.docstore.document import Document
14
+ from typing import List, Dict, Any, Optional
15
+ import uuid
16
+ import tempfile
17
+ from langchain.agents import Tool
18
+ from urllib.parse import urlparse
19
+ import pytesseract
20
+ from langgraph.prebuilt import create_react_agent
21
+ from PIL import Image, ImageDraw, ImageFont, ImageEnhance, ImageFilter
22
+ import requests
23
+ from dotenv import load_dotenv
24
+ import os
25
+ import cmath
26
+ import httpx
27
+ from pathlib import Path
28
+ import base64
29
+ from langchain_community.tools import DuckDuckGoSearchResults
30
+
31
+
32
+ from smolagents import DuckDuckGoSearchTool,PythonInterpreterTool,WikipediaSearchTool,VisitWebpageTool,GoogleSearchTool
33
+ import numpy as np
34
+
35
+
36
+
37
+
38
+ load_dotenv()
39
+ ChatGroq_key=os.getenv("ChatGroq")
40
+ HF_TOKEN=os.getenv("HF_TOKEN")
41
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
42
+ SERPAPI_API_KEY=os.getenv("SERPAPI_API_KEY")
43
+ TAVILY_API_KEY=os.getenv("TAVILY_API_KEY")
44
+
45
+
46
+
47
+
48
+
49
+ def build_graph():
50
+ """Builds the graph"""
51
+
52
+ llm = ChatOpenAI(
53
+ model="gpt-4o",
54
+ api_key=OPENAI_API_KEY,
55
+ temperature=0.3,
56
+ max_retries=5,
57
+ verbose=True,
58
+ timeout=10
59
+ )
60
+
61
+
62
+ @tool
63
+ def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
64
+ """
65
+ Download a file from a URL and save it to a temporary location.
66
+ Args:
67
+ url (str): the URL of the file to download.
68
+ filename (str, optional): the name of the file. If not provided, a random name file will be created.
69
+ """
70
+ try:
71
+ # Parse URL to get filename if not provided
72
+ if not filename:
73
+ path = urlparse(url).path
74
+ filename = os.path.basename(path)
75
+ if not filename:
76
+ filename = f"downloaded_{uuid.uuid4().hex[:8]}"
77
+
78
+ # Create temporary file
79
+ temp_dir = tempfile.gettempdir()
80
+ filepath = os.path.join(temp_dir, filename)
81
+
82
+ # Download the file
83
+ response = requests.get(url, stream=True)
84
+ response.raise_for_status()
85
+
86
+ # Save the file
87
+ with open(filepath, "wb") as f:
88
+ for chunk in response.iter_content(chunk_size=8192):
89
+ f.write(chunk)
90
+
91
+ return f"File downloaded to {filepath}. You can read this file to process its contents."
92
+ except Exception as e:
93
+ return f"Error downloading file: {str(e)}"
94
+
95
+
96
+
97
+
98
+ Tools = [
99
+ DuckDuckGoSearchTool(),
100
+ PythonInterpreterTool(),
101
+ WikipediaSearchTool(),
102
+ VisitWebpageTool(),
103
+ GoogleSearchTool(),
104
+ download_file_from_url
105
+ ]
106
+
107
+
108
+
109
+ from langchain_core.messages import SystemMessage
110
+
111
+ # Read the system prompt from the file
112
+ prompt_template = "prompt_template.txt"
113
+ with open(prompt_template, 'r', encoding='utf-8') as file:
114
+ prompt_content = file.read()
115
+
116
+ # Create the SystemMessage
117
+ system_message = SystemMessage(content=prompt_content)
118
+
119
+ ai_agent = create_react_agent(#from langchain.agents.react.base import ReActAgent
120
+ model=llm,
121
+ tools=Tools,
122
+ prompt=system_message
123
+ )
124
+
125
+
126
+
127
+ return ai_agent
128
+
129
+
130
+ # if __name__ == "__main__":
131
+ # graph = build_graph(provider="openAi")
132
+ # img_bytes = graph.get_graph().draw_mermaid_png()
133
+ # with open("dav.png", "wb") as f:
134
+ # f.write(img_bytes)
135
+
136
+
137
+
138
+
139
+
app.py CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- from agent import build_graph
7
  #from langchain.schema import HumanMessage
8
  from langchain_core.messages import HumanMessage
9
  import time
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from agent_smolTool import build_graph
7
  #from langchain.schema import HumanMessage
8
  from langchain_core.messages import HumanMessage
9
  import time
prompt_template.txt CHANGED
@@ -5,7 +5,4 @@ If you are asked for a number, don't use comma to write your number neither use
5
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
6
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
7
 
8
- You will be provided with a question from the GAIA benchmark level 1. the question can provide a file_path that is important for you to use in order to provide correct answer
9
- Always provide a short correct answer. do not provide a question back. you must always try too provide a correct answer, you can use tools for this
10
-
11
- Remember if any websearch tool fails. try to use any of the other web_search tools, to provide you with information, so you can give a FINAL ANSWER: correctly
 
5
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
6
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
7
 
8
+ Please use your available tools to gain more up to date knowledge of providing the correct answer.
 
 
 
requirements.txt CHANGED
@@ -33,4 +33,6 @@ gradio[oauth]
33
  wikipedia
34
  openai
35
  smolagents
36
- langchain-tavily
 
 
 
33
  wikipedia
34
  openai
35
  smolagents
36
+ langchain-tavily
37
+ SpeechRecognition
38
+ pydub