| import whisper |
| from pydub import AudioSegment |
| import gradio as gr |
|
|
| def convert_6ch_wav_to_stereo(input_file_path, output_file_path): |
| sound = AudioSegment.from_file(input_file_path, format="wav") |
| if sound.channels != 6: |
| sound.export(output_file_path, format="wav") |
| return |
| front_left = sound.split_to_mono()[0] |
| front_right = sound.split_to_mono()[1] |
| center = sound.split_to_mono()[2] |
| back_left = sound.split_to_mono()[4] |
| back_right = sound.split_to_mono()[5] |
| center = center - 6 |
| back_left = back_left - 6 |
| back_right = back_right - 6 |
| stereo_left = front_left.overlay(center).overlay(back_left) |
| stereo_right = front_right.overlay(center).overlay(back_right) |
| stereo_sound = AudioSegment.from_mono_audiosegments(stereo_left, stereo_right) |
| stereo_sound.export(output_file_path, format="wav") |
|
|
|
|
| def judge_command(file_path): |
| whisper_model = whisper.load_model("medium", device="cpu") |
| out_path='./out.wav' |
| convert_6ch_wav_to_stereo(file_path,out_path) |
| result = whisper_model.transcribe(out_path,language="en") |
| text_result = result['text'] |
| print(text_result) |
| return text_result |
|
|
|
|
| def handle_audio_transcription(file_path): |
| try: |
| text_result = judge_command(file_path) |
| message = "Transcription successful!" |
| except Exception as e: |
| message = str(e) |
| text_result = "" |
| return message, text_result |
|
|
| with gr.Blocks() as audio_transcription_page: |
|
|
| gr.Markdown( |
| ''' |
| This space transcribes the spoken words from an audio file to text. |
| ## How to use this Space? |
| - Upload a '.wav' file. |
| - The transcription of the audio will be shown after you click the transcribe button. |
| ## Examples |
| - You can get the test examples from our [Roop Dataset Repo.](https://huggingface.co/datasets/SJTU-TES/WAV2COM) |
| ''' |
| ) |
|
|
| with gr.Row(): |
| with gr.Column(): |
| audio_file = gr.File( |
| file_types=[".wav"], |
| label="Upload a '.wav' file", |
| ) |
| info = gr.Textbox( |
| value="", |
| label="Log", |
| placeholder="Transcription results will appear here...", |
| ) |
| transcribe_button = gr.Button("Transcribe") |
|
|
| transcribe_button.click( |
| handle_audio_transcription, |
| [audio_file], |
| [info] |
| ) |
|
|
| if __name__ == "__main__": |
| audio_transcription_page.launch(debug=True) |
|
|