Spaces:

VladB46
/

VerbalSentimentAnalysis

Sleeping

Vlad Bastina commited on Oct 16, 2025

Commit

9d517ed

1 Parent(s): 8203f61

changes

Files changed (1) hide show

translation.py CHANGED Viewed

@@ -11,13 +11,6 @@ def get_audio_properties(file_path):
         channels = wav_file.getnchannels()
     return sample_rate, channels
-def convert_to_mono(input_path, output_path):
-    """Convert video from 2+ channel audio to 1 channel audio for a
-    single detection"""
-    audio = AudioSegment.from_wav(input_path)
-    mono_audio = audio.set_channels(1)
-    mono_audio.export(output_path, format="wav")
 def transcribe_audio(file_path):
     """Gets the .wav file path and perform speech to text
     returns the string witch represents the words spoken"""
@@ -50,18 +43,24 @@ def transcribe_audio(file_path):
     # Return or print the concatenated transcript
     return concatenated_transcript
-def get_transcription_from_sound(file_path:str)->str:
-    """Converts the audio to a single channel and calls the transcription function"""
-    output_path = "audio_mono.wav"
-    convert_to_mono(file_path,output_path)
     final_transcript = transcribe_audio(output_path)
     return final_transcript
 if __name__=="__main__":
     file_path = "jackhammer.wav"
-    output_path = "audio_mono.wav"
-    convert_to_mono(file_path,output_path)
-    final_transcript = transcribe_audio(output_path)
-    print(final_transcript)

         channels = wav_file.getnchannels()
     return sample_rate, channels
 def transcribe_audio(file_path):
     """Gets the .wav file path and perform speech to text
     returns the string witch represents the words spoken"""
     # Return or print the concatenated transcript
     return concatenated_transcript
+def get_transcription_from_sound(file_path: str) -> str:
+    """Converts the audio to a single channel, 16-bit, 44100 Hz WAV and calls the transcription function"""
+    # Load the audio file (handles both WAV and MP3)
+    audio = AudioSegment.from_file(file_path)
+    # Convert to mono, 16-bit, 44100 Hz WAV
+    audio = audio.set_channels(1)
+    audio = audio.set_frame_rate(44100)
+    audio = audio.set_sample_width(2)  # 2 bytes = 16-bit
+    output_path = "audio_processed.wav"
+    audio.export(output_path, format="wav")
     final_transcript = transcribe_audio(output_path)
     return final_transcript
 if __name__=="__main__":
     file_path = "jackhammer.wav"
+    final_transcript = get_transcription_from_sound(file_path)
+    print(final_transcript)