Vlad Bastina commited on
Commit
9d517ed
·
1 Parent(s): 8203f61
Files changed (1) hide show
  1. translation.py +16 -17
translation.py CHANGED
@@ -11,13 +11,6 @@ def get_audio_properties(file_path):
11
  channels = wav_file.getnchannels()
12
  return sample_rate, channels
13
 
14
- def convert_to_mono(input_path, output_path):
15
- """Convert video from 2+ channel audio to 1 channel audio for a
16
- single detection"""
17
- audio = AudioSegment.from_wav(input_path)
18
- mono_audio = audio.set_channels(1)
19
- mono_audio.export(output_path, format="wav")
20
-
21
  def transcribe_audio(file_path):
22
  """Gets the .wav file path and perform speech to text
23
  returns the string witch represents the words spoken"""
@@ -50,18 +43,24 @@ def transcribe_audio(file_path):
50
  # Return or print the concatenated transcript
51
  return concatenated_transcript
52
 
53
- def get_transcription_from_sound(file_path:str)->str:
54
- """Converts the audio to a single channel and calls the transcription function"""
55
- output_path = "audio_mono.wav"
56
- convert_to_mono(file_path,output_path)
57
-
 
 
 
 
 
 
 
 
58
  final_transcript = transcribe_audio(output_path)
59
-
60
  return final_transcript
61
 
62
  if __name__=="__main__":
63
  file_path = "jackhammer.wav"
64
- output_path = "audio_mono.wav"
65
- convert_to_mono(file_path,output_path)
66
- final_transcript = transcribe_audio(output_path)
67
- print(final_transcript)
 
11
  channels = wav_file.getnchannels()
12
  return sample_rate, channels
13
 
 
 
 
 
 
 
 
14
  def transcribe_audio(file_path):
15
  """Gets the .wav file path and perform speech to text
16
  returns the string witch represents the words spoken"""
 
43
  # Return or print the concatenated transcript
44
  return concatenated_transcript
45
 
46
+ def get_transcription_from_sound(file_path: str) -> str:
47
+ """Converts the audio to a single channel, 16-bit, 44100 Hz WAV and calls the transcription function"""
48
+ # Load the audio file (handles both WAV and MP3)
49
+ audio = AudioSegment.from_file(file_path)
50
+
51
+ # Convert to mono, 16-bit, 44100 Hz WAV
52
+ audio = audio.set_channels(1)
53
+ audio = audio.set_frame_rate(44100)
54
+ audio = audio.set_sample_width(2) # 2 bytes = 16-bit
55
+
56
+ output_path = "audio_processed.wav"
57
+ audio.export(output_path, format="wav")
58
+
59
  final_transcript = transcribe_audio(output_path)
60
+
61
  return final_transcript
62
 
63
  if __name__=="__main__":
64
  file_path = "jackhammer.wav"
65
+ final_transcript = get_transcription_from_sound(file_path)
66
+ print(final_transcript)