Luigi D'Addona commited on
Commit
1ae2c4e
·
1 Parent(s): 15c740f

aggiunto tool analyze_mp3_file

Browse files
Files changed (2) hide show
  1. agent.py +3 -2
  2. tools.py +53 -1
agent.py CHANGED
@@ -14,7 +14,7 @@ from langchain_google_genai import ChatGoogleGenerativeAI
14
 
15
  # Local imports
16
  from tools import get_search_tool, get_tavily_search_tool, get_wikipedia_tool, wikipedia_search, wikipedia_search_3,\
17
- execute_python_code_from_file, download_taskid_file, analyze_excel_file
18
 
19
  # Nota: per i test in locale si usa il .env
20
  # su HuggingFace invece si usano le variabili definite in Settings/"Variables and secrets"
@@ -58,8 +58,9 @@ chat = ChatGoogleGenerativeAI(
58
  #search_tool = get_search_tool()
59
  search_tool = get_tavily_search_tool()
60
  #wikipedia_tool = get_wikipedia_tool()
 
61
 
62
- tools = [search_tool, wikipedia_search_3, execute_python_code_from_file, download_taskid_file, analyze_excel_file]
63
 
64
  # Bind tools to the model
65
  chat_with_tools = chat.bind_tools(tools)
 
14
 
15
  # Local imports
16
  from tools import get_search_tool, get_tavily_search_tool, get_wikipedia_tool, wikipedia_search, wikipedia_search_3,\
17
+ execute_python_code_from_file, download_taskid_file, analyze_excel_file, get_analyze_mp3_tool
18
 
19
  # Nota: per i test in locale si usa il .env
20
  # su HuggingFace invece si usano le variabili definite in Settings/"Variables and secrets"
 
58
  #search_tool = get_search_tool()
59
  search_tool = get_tavily_search_tool()
60
  #wikipedia_tool = get_wikipedia_tool()
61
+ analyze_mp3_tool = get_analyze_mp3_tool(chat)
62
 
63
+ tools = [search_tool, wikipedia_search_3, execute_python_code_from_file, download_taskid_file, analyze_excel_file, analyze_mp3_tool]
64
 
65
  # Bind tools to the model
66
  chat_with_tools = chat.bind_tools(tools)
tools.py CHANGED
@@ -2,6 +2,7 @@ import os, sys
2
  from dotenv import load_dotenv
3
  import requests
4
  import pandas as pd
 
5
 
6
  from langchain_community.tools import DuckDuckGoSearchRun
7
  from langchain_community.utilities import WikipediaAPIWrapper
@@ -11,6 +12,7 @@ import wikipedia
11
  from langchain_tavily import TavilySearch
12
  from langchain_core.tools import tool
13
  from langchain.tools import Tool
 
14
 
15
  # per gestire esecuzione di codice python
16
  import subprocess
@@ -198,4 +200,54 @@ def analyze_excel_file(file_path: str, query: str) -> str:
198
 
199
  return str(result) # Convert result to string for the LLM
200
  except Exception as e:
201
- return f"Error analyzing Excel file: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from dotenv import load_dotenv
3
  import requests
4
  import pandas as pd
5
+ import base64
6
 
7
  from langchain_community.tools import DuckDuckGoSearchRun
8
  from langchain_community.utilities import WikipediaAPIWrapper
 
12
  from langchain_tavily import TavilySearch
13
  from langchain_core.tools import tool
14
  from langchain.tools import Tool
15
+ from langchain_core.messages import HumanMessage
16
 
17
  # per gestire esecuzione di codice python
18
  import subprocess
 
200
 
201
  return str(result) # Convert result to string for the LLM
202
  except Exception as e:
203
+ return f"Error analyzing Excel file: {e}"
204
+
205
+
206
+ def get_analyze_mp3_tool(llm):
207
+
208
+ @tool
209
+ def analyze_mp3_file(audio_path: str) -> str:
210
+ """
211
+ Extract text from an mp3 audio file.
212
+ """
213
+ all_text = ""
214
+ try:
215
+ # Read audio and encode as base64
216
+ with open(audio_path, "rb") as image_file:
217
+ audio_bytes = image_file.read()
218
+
219
+ audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
220
+
221
+ # Prepare the prompt including the base64 image data
222
+ message = [
223
+ HumanMessage(
224
+ content=[
225
+ {
226
+ "type": "text",
227
+ "text": (
228
+ "Extract all the text from this audio. "
229
+ "Return only the extracted text, no explanations."
230
+ ),
231
+ },
232
+ {
233
+ "type": "audio_url",
234
+ "audio_url": {"url": audio_base64},
235
+ },
236
+ ]
237
+ )
238
+ ]
239
+
240
+ # Call the vision-capable model
241
+ response = llm.invoke(message)
242
+
243
+ # Append extracted text
244
+ all_text += response.content + "\n\n"
245
+
246
+ return all_text.strip()
247
+ except Exception as e:
248
+ print("Error extracting text from audio file:{} - {}".format(audio_path, e))
249
+ return ""
250
+
251
+ return analyze_mp3_file
252
+
253
+