Final_Assignment_Template

Build error

App Files Files Community

akrstova commited on May 18, 2025

Commit

57a3c14

1 Parent(s): bb4ec09

Replace whisper with transformers pipeline

Browse files

Files changed (5) hide show

agent.py +1 -1
pyproject.toml +1 -0
requirements.txt +1 -0
tools/file_tools.py +12 -28
uv.lock +2 -0

agent.py CHANGED Viewed

@@ -91,7 +91,7 @@ def build_graph():
 if __name__ == "__main__":
-    question = "On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?"
     # Build the graph
     graph = build_graph()
     # Run the graph

 if __name__ == "__main__":
+    question = "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name."
     # Build the graph
     graph = build_graph()
     # Run the graph

pyproject.toml CHANGED Viewed

@@ -13,6 +13,7 @@ dependencies = [
     "langchain-google-genai>=2.1.4",
     "langchain-huggingface>=0.2.0",
     "langgraph>=0.4.3",
     "openai-whisper>=20240930",
     "pandas>=2.2.3",
     "pytesseract>=0.3.13",

     "langchain-google-genai>=2.1.4",
     "langchain-huggingface>=0.2.0",
     "langgraph>=0.4.3",
+    "numpy>=2.2.5",
     "openai-whisper>=20240930",
     "pandas>=2.2.3",
     "pytesseract>=0.3.13",

requirements.txt CHANGED Viewed

@@ -20,3 +20,4 @@ pgvector
 python-dotenv
 openai-whisper
 pytesseract

 python-dotenv
 openai-whisper
 pytesseract
+transformers

tools/file_tools.py CHANGED Viewed

@@ -9,14 +9,11 @@ import contextlib
 from langchain_core.tools import tool
 from langchain_google_genai import ChatGoogleGenerativeAI
 import requests
-import whisper
 from PIL import Image
 import pytesseract
-# Load Whisper model once
-whisper_model = whisper.load_model("base")  # or "small", "medium", "large"
 @tool
 def analyze_excel_file(file_path: str, query: str) -> str:
     """
@@ -46,41 +43,28 @@ def analyze_excel_file(file_path: str, query: str) -> str:
 @tool
-def process_mp3_file(file_path: str, query: str) -> str:
     """
-    Transcribes an mp3 file and answers a question about its content.
     Args:
-        file_path (str): The path to the .mp3 file
-        query (str): The question to ask about the transcript
     Returns:
-        str: The answer to the query based on audio content
     """
     try:
         print(f"Transcribing: {file_path}")
-        # Whisper automatically handles MP3 input
-        result = whisper_model.transcribe(file_path)
         transcript = result["text"]
-        if not transcript.strip():
-            return "Could not extract any meaningful text from the audio."
-        # Ask question about transcript using Gemini
-        llm = ChatGoogleGenerativeAI(
-            model="gemini-2.0-flash-001",
-            temperature=0.7,
-            max_tokens=None,
-            google_api_key=os.getenv("GOOGLE_API_KEY"),
-        )
-        prompt = f"Transcript:\n{transcript}\n\nQuestion: {query}\nAnswer only based on the transcript above."
-        response = llm.invoke(prompt)
-        return response.content
     except Exception as e:
-        return f"Error processing mp3 file: {str(e)}"

 from langchain_core.tools import tool
 from langchain_google_genai import ChatGoogleGenerativeAI
 import requests
 from PIL import Image
 import pytesseract
+from transformers import pipeline
 @tool
 def analyze_excel_file(file_path: str, query: str) -> str:
     """
+# Load ASR pipeline once at module level (for efficiency)
+asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=-1)
 @tool
+def transcribe_audio(file_path: str, query: str = "") -> str:
     """
+    Transcribes speech from an audio file (e.g., .mp3 or .wav).
     Args:
+        file_path (str): Path to the audio file.
+        query (str): (Optional) Ignored; present to support LangChain tool schema.
     Returns:
+        str: Transcribed text from the audio.
     """
     try:
         print(f"Transcribing: {file_path}")
+        result = asr_pipeline(file_path)
         transcript = result["text"]
+        return transcript.strip() if transcript.strip() else "No speech detected."
     except Exception as e:
+        return f"Error transcribing audio: {str(e)}"

uv.lock CHANGED Viewed

@@ -368,6 +368,7 @@ dependencies = [
     { name = "langchain-google-genai" },
     { name = "langchain-huggingface" },
     { name = "langgraph" },
     { name = "openai-whisper" },
     { name = "pandas" },
     { name = "pytesseract" },
@@ -387,6 +388,7 @@ requires-dist = [
     { name = "langchain-google-genai", specifier = ">=2.1.4" },
     { name = "langchain-huggingface", specifier = ">=0.2.0" },
     { name = "langgraph", specifier = ">=0.4.3" },
     { name = "openai-whisper", specifier = ">=20240930" },
     { name = "pandas", specifier = ">=2.2.3" },
     { name = "pytesseract", specifier = ">=0.3.13" },

     { name = "langchain-google-genai" },
     { name = "langchain-huggingface" },
     { name = "langgraph" },
+    { name = "numpy" },
     { name = "openai-whisper" },
     { name = "pandas" },
     { name = "pytesseract" },
     { name = "langchain-google-genai", specifier = ">=2.1.4" },
     { name = "langchain-huggingface", specifier = ">=0.2.0" },
     { name = "langgraph", specifier = ">=0.4.3" },
+    { name = "numpy", specifier = ">=2.2.5" },
     { name = "openai-whisper", specifier = ">=20240930" },
     { name = "pandas", specifier = ">=2.2.3" },
     { name = "pytesseract", specifier = ">=0.3.13" },