ShoaibSSM commited on
Commit
fab790b
·
verified ·
1 Parent(s): c780dca

Update tools/audio_transcribing.py

Browse files
Files changed (1) hide show
  1. tools/audio_transcribing.py +17 -65
tools/audio_transcribing.py CHANGED
@@ -2,80 +2,32 @@ from langchain.tools import tool
2
  import whisper
3
  import os
4
  import re
5
- from pydub import AudioSegment
6
 
7
- # Load Whisper once globally (fast)
8
- model = whisper.load_model("tiny.en") # fastest + accurate for numbers
9
 
10
  @tool
11
  def transcribe_audio(file_path: str) -> str:
12
  """
13
- Transcribe an MP3 or WAV audio file using Whisper (offline, reliable, deterministic).
14
- Automatically converts MP3 WAV before transcription.
15
  """
16
  try:
17
- # Build absolute path
18
- path = os.path.join("LLMFiles", file_path)
19
 
20
- # Convert mp3 → wav for Whisper consistency
21
- if path.lower().endswith(".mp3"):
22
- sound = AudioSegment.from_mp3(path)
23
- wav_path = path.replace(".mp3", ".wav")
24
- sound.export(wav_path, format="wav")
25
- path = wav_path # update final path
26
 
27
- # Run Whisper
28
- result = model.transcribe(path)
29
- text = result["text"].strip()
30
 
31
- return text
 
32
 
33
- except Exception as e:
34
- return f"Audio transcription error: {e}"
35
-
36
-
37
-
38
- # from langchain.tools import tool
39
- # import speech_recognition as sr
40
- # from pydub import AudioSegment
41
- # import os
42
-
43
- # @tool
44
- # def transcribe_audio(file_path: str) -> str:
45
- # """
46
- # Transcribe an MP3 or WAV audio file into text using Google's Web Speech API.
47
-
48
- # Args:
49
- # file_path (str): Path to the input audio file (.mp3 or .wav).
50
 
51
- # Returns:
52
- # str: The transcribed text from the audio.
53
-
54
- # Notes:
55
- # - MP3 files are automatically converted to WAV.
56
- # - Requires `pydub` and `speech_recognition` packages.
57
- # - Uses Google's free recognize_google() API (requires internet).
58
- # """
59
- # try:
60
- # # Convert MP3 → WAV if needed
61
- # file_path = os.path.join("LLMFiles", file_path)
62
- # final_path = file_path
63
- # if file_path.lower().endswith(".mp3"):
64
- # sound = AudioSegment.from_mp3(file_path)
65
- # final_path = file_path.replace(".mp3", ".wav")
66
- # sound.export(final_path, format="wav")
67
-
68
- # # Speech recognition
69
- # recognizer = sr.Recognizer()
70
- # with sr.AudioFile(final_path) as source:
71
- # audio_data = recognizer.record(source)
72
- # text = recognizer.recognize_google(audio_data)
73
-
74
- # # If we converted the file, remove temp wav
75
- # if final_path != file_path and os.path.exists(final_path):
76
- # os.remove(final_path)
77
-
78
- # return text
79
- # except Exception as e:
80
-
81
- # return f"Error occurred: {e}"
 
2
  import whisper
3
  import os
4
  import re
 
5
 
6
+ model = whisper.load_model("base")
 
7
 
8
  @tool
9
  def transcribe_audio(file_path: str) -> str:
10
  """
11
+ Transcribe audio AND return the SUM of all numbers spoken.
12
+ Returns a stringified integer ready for submission.
13
  """
14
  try:
15
+ fp = os.path.join("LLMFiles", file_path)
 
16
 
17
+ # run whisper
18
+ result = model.transcribe(fp)
19
+ text = result["text"]
 
 
 
20
 
21
+ # extract all numbers
22
+ nums = re.findall(r"\d+", text)
23
+ nums_int = [int(n) for n in nums]
24
 
25
+ if not nums_int:
26
+ return "Error: No numbers detected"
27
 
28
+ # sum them
29
+ total = sum(nums_int)
30
+ return str(total)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ except Exception as e:
33
+ return f"Error: {e}"