New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 2, 2025

Commit

09b1a3d

1 Parent(s): 7fb0070

whis

Browse files

Files changed (2) hide show

requirements.txt +1 -2
tools.py +23 -34

requirements.txt CHANGED Viewed

@@ -8,5 +8,4 @@ openai
 pandas
 langchain_openai
 langchain_community
-pydub
-whisper

 pandas
 langchain_openai
 langchain_community
+openai

tools.py CHANGED Viewed

@@ -86,55 +86,44 @@ import os
 from pydub import AudioSegment
 from pydub.utils import make_chunks
-_whisper_model = whisper.load_model("base")
 def audio_transcriber_tool(state: AgentState) -> AgentState:
     """
-    LangGraph tool for transcribing audio via Whisper.
-    Expects: state["audio_path"] to be a path to a .wav/.mp3/.m4a file.
     Returns:
-        {
-            "audio_path": None,
-            "transcript": "<full transcribed text>"
-        }
-    If no valid audio_path is found, returns {} to signal "no-op."
     """
     path = state.get("audio_path", "")
     if not path or not os.path.exists(path):
         return {}
     try:
-        # Whisper API has a ~25 MB limit per request. If file is small, transcribe directly.
-        max_bytes = 25 * 1024 * 1024
-        if os.path.getsize(path) <= max_bytes:
-            result = _whisper_model.transcribe(path)
-            text = result["text"].strip()
-        else:
-            # For large files, split into 2-minute (120 s) chunks
-            audio = AudioSegment.from_file(path)
-            chunk_length_ms = 120 * 1000
-            chunks = make_chunks(audio, chunk_length_ms)
-            transcripts = []
-            for i, chunk in enumerate(chunks):
-                chunk_name = f"temp_chunk_{i}.wav"
-                chunk.export(chunk_name, format="wav")
-                res = _whisper_model.transcribe(chunk_name)
-                transcripts.append(res["text"].strip())
-                os.remove(chunk_name)
-            text = "\n".join(transcripts)
     except Exception as e:
         text = f"Error during transcription: {e}"
     return {
         "audio_path": None,
         "transcript": text
-    }

 from pydub import AudioSegment
 from pydub.utils import make_chunks
+import os
+import openai
+from state import AgentState
 def audio_transcriber_tool(state: AgentState) -> AgentState:
     """
+    LangGraph tool for transcribing audio via OpenAI’s hosted Whisper API.
+    Expects: state["audio_path"] to be a valid path to a .wav/.mp3/.m4a file.
     Returns:
+      {
+        "audio_path": None,
+        "transcript": "<transcribed text or error message>"
+      }
+    If no valid audio_path is provided, returns {}.
     """
     path = state.get("audio_path", "")
     if not path or not os.path.exists(path):
         return {}
     try:
+        openai.api_key = os.getenv("OPENAI_API_KEY")
+        if not openai.api_key:
+            raise RuntimeError("OPENAI_API_KEY is not set in environment.")
+        with open(path, "rb") as audio_file:
+            # For OpenAI Python library v0.27.0+:
+            response = openai.Audio.transcribe("whisper-1", audio_file)
+            # If using an older OpenAI library, use:
+            # response = openai.Audio.create_transcription(file=audio_file, model="whisper-1")
+        text = response["text"].strip()
     except Exception as e:
         text = f"Error during transcription: {e}"
     return {
         "audio_path": None,
         "transcript": text
+    }