Final_Assignment_Template

Sleeping

App Files Files Community

Luigi D'Addona commited on Jun 20

Commit

1ae2c4e

1 Parent(s): 15c740f

aggiunto tool analyze_mp3_file

Browse files

Files changed (2) hide show

agent.py +3 -2
tools.py +53 -1

agent.py CHANGED Viewed

@@ -14,7 +14,7 @@ from langchain_google_genai import ChatGoogleGenerativeAI
 # Local imports
 from tools import get_search_tool, get_tavily_search_tool, get_wikipedia_tool, wikipedia_search, wikipedia_search_3,\
-                  execute_python_code_from_file, download_taskid_file, analyze_excel_file
 # Nota: per i test in locale si usa il .env
 #       su HuggingFace invece si usano le variabili definite in Settings/"Variables and secrets"
@@ -58,8 +58,9 @@ chat = ChatGoogleGenerativeAI(
 #search_tool = get_search_tool()
 search_tool = get_tavily_search_tool()
 #wikipedia_tool = get_wikipedia_tool()
-tools = [search_tool, wikipedia_search_3, execute_python_code_from_file, download_taskid_file, analyze_excel_file]
 # Bind tools to the model
 chat_with_tools = chat.bind_tools(tools)

 # Local imports
 from tools import get_search_tool, get_tavily_search_tool, get_wikipedia_tool, wikipedia_search, wikipedia_search_3,\
+                  execute_python_code_from_file, download_taskid_file, analyze_excel_file, get_analyze_mp3_tool
 # Nota: per i test in locale si usa il .env
 #       su HuggingFace invece si usano le variabili definite in Settings/"Variables and secrets"
 #search_tool = get_search_tool()
 search_tool = get_tavily_search_tool()
 #wikipedia_tool = get_wikipedia_tool()
+analyze_mp3_tool = get_analyze_mp3_tool(chat)
+tools = [search_tool, wikipedia_search_3, execute_python_code_from_file, download_taskid_file, analyze_excel_file, analyze_mp3_tool]
 # Bind tools to the model
 chat_with_tools = chat.bind_tools(tools)

tools.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os, sys
 from dotenv import load_dotenv
 import requests
 import pandas as pd
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_community.utilities import WikipediaAPIWrapper
@@ -11,6 +12,7 @@ import wikipedia
 from langchain_tavily import TavilySearch
 from langchain_core.tools import tool
 from langchain.tools import Tool
 # per gestire esecuzione di codice python
 import subprocess
@@ -198,4 +200,54 @@ def analyze_excel_file(file_path: str, query: str) -> str:
         return str(result) # Convert result to string for the LLM
     except Exception as e:
-        return f"Error analyzing Excel file: {e}"

 from dotenv import load_dotenv
 import requests
 import pandas as pd
+import base64
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_community.utilities import WikipediaAPIWrapper
 from langchain_tavily import TavilySearch
 from langchain_core.tools import tool
 from langchain.tools import Tool
+from langchain_core.messages import HumanMessage
 # per gestire esecuzione di codice python
 import subprocess
         return str(result) # Convert result to string for the LLM
     except Exception as e:
+        return f"Error analyzing Excel file: {e}"
+def get_analyze_mp3_tool(llm):
+    @tool
+    def analyze_mp3_file(audio_path: str) -> str:
+        """
+        Extract text from an mp3 audio file.
+        """
+        all_text = ""
+        try:
+            # Read audio and encode as base64
+            with open(audio_path, "rb") as image_file:
+                audio_bytes = image_file.read()
+            audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
+            # Prepare the prompt including the base64 image data
+            message = [
+                HumanMessage(
+                    content=[
+                        {
+                            "type": "text",
+                            "text": (
+                                "Extract all the text from this audio. "
+                                "Return only the extracted text, no explanations."
+                            ),
+                        },
+                        {
+                            "type": "audio_url",
+                            "audio_url": {"url": audio_base64},
+                        },
+                    ]
+                )
+            ]
+            # Call the vision-capable model
+            response = llm.invoke(message)
+            # Append extracted text
+            all_text += response.content + "\n\n"
+            return all_text.strip()
+        except Exception as e:
+            print("Error extracting text from audio file:{} - {}".format(audio_path, e))
+            return ""
+    return analyze_mp3_file