Spaces:

ShoaibSSM
/

LLM-Analysis-TDS-Project-2

Sleeping

App Files Files Community

ShoaibSSM commited on Nov 28, 2025

Commit

fab790b

verified ·

1 Parent(s): c780dca

Update tools/audio_transcribing.py

Browse files

Files changed (1) hide show

tools/audio_transcribing.py +17 -65

tools/audio_transcribing.py CHANGED Viewed

@@ -2,80 +2,32 @@ from langchain.tools import tool
 import whisper
 import os
 import re
-from pydub import AudioSegment
-# Load Whisper once globally (fast)
-model = whisper.load_model("tiny.en")  # fastest + accurate for numbers
 @tool
 def transcribe_audio(file_path: str) -> str:
     """
-    Transcribe an MP3 or WAV audio file using Whisper (offline, reliable, deterministic).
-    Automatically converts MP3 → WAV before transcription.
     """
     try:
-        # Build absolute path
-        path = os.path.join("LLMFiles", file_path)
-        # Convert mp3 → wav for Whisper consistency
-        if path.lower().endswith(".mp3"):
-            sound = AudioSegment.from_mp3(path)
-            wav_path = path.replace(".mp3", ".wav")
-            sound.export(wav_path, format="wav")
-            path = wav_path  # update final path
-        # Run Whisper
-        result = model.transcribe(path)
-        text = result["text"].strip()
-        return text
-    except Exception as e:
-        return f"Audio transcription error: {e}"
-# from langchain.tools import tool
-# import speech_recognition as sr
-# from pydub import AudioSegment
-# import os
-# @tool
-# def transcribe_audio(file_path: str) -> str:
-#     """
-#     Transcribe an MP3 or WAV audio file into text using Google's Web Speech API.
-#     Args:
-#         file_path (str): Path to the input audio file (.mp3 or .wav).
-#     Returns:
-#         str: The transcribed text from the audio.
-#     Notes:
-#         - MP3 files are automatically converted to WAV.
-#         - Requires `pydub` and `speech_recognition` packages.
-#         - Uses Google's free recognize_google() API (requires internet).
-#     """
-#     try:
-#         # Convert MP3 → WAV if needed
-#         file_path = os.path.join("LLMFiles", file_path)
-#         final_path = file_path
-#         if file_path.lower().endswith(".mp3"):
-#             sound = AudioSegment.from_mp3(file_path)
-#             final_path = file_path.replace(".mp3", ".wav")
-#             sound.export(final_path, format="wav")
-#         # Speech recognition
-#         recognizer = sr.Recognizer()
-#         with sr.AudioFile(final_path) as source:
-#             audio_data = recognizer.record(source)
-#             text = recognizer.recognize_google(audio_data)
-#         # If we converted the file, remove temp wav
-#         if final_path != file_path and os.path.exists(final_path):
-#             os.remove(final_path)
-#         return text
-#     except Exception as e:
-#         return f"Error occurred: {e}"

 import whisper
 import os
 import re
+model = whisper.load_model("base")
 @tool
 def transcribe_audio(file_path: str) -> str:
     """
+    Transcribe audio AND return the SUM of all numbers spoken.
+    Returns a stringified integer ready for submission.
     """
     try:
+        fp = os.path.join("LLMFiles", file_path)
+        # run whisper
+        result = model.transcribe(fp)
+        text = result["text"]
+        # extract all numbers
+        nums = re.findall(r"\d+", text)
+        nums_int = [int(n) for n in nums]
+        if not nums_int:
+            return "Error: No numbers detected"
+        # sum them
+        total = sum(nums_int)
+        return str(total)
+    except Exception as e:
+        return f"Error: {e}"