Spaces:
Runtime error
Runtime error
| import os | |
| import time | |
| import gradio as gr | |
| from pathlib import Path | |
| import pysrt | |
| import pandas as pd | |
| if os.path.isdir(f'{os.getcwd() + os.sep}whisper.cpp'): | |
| print("Models already loaded") | |
| else: | |
| os.system('git clone https://github.com/ggerganov/whisper.cpp.git') | |
| os.system("git reset --hard 3163090d89c47933d7c2a080b224f0d2e842b468") | |
| os.system('git clone https://huggingface.co/Finnish-NLP/Finnish-finetuned-whisper-models-ggml-format') | |
| os.system('make -C ./whisper.cpp') | |
| whisper_models = ["medium", "large"] | |
| whisper_modelpath_translator= { | |
| "medium": "./Finnish-finetuned-whisper-models-ggml-format/ggml-model-fi-medium.bin", | |
| "large": "./Finnish-finetuned-whisper-models-ggml-format/ggml-model-fi-large-v3.bin" | |
| } | |
| def speech_to_text(audio_path, whisper_model): | |
| if(audio_path is None): | |
| retry_cnt = 0 | |
| for retry_cnt in range(3): | |
| if(audio_path is None): | |
| print(f'Retrying, retry counter: {retry_cnt +1}') | |
| time.sleep(0.5) | |
| retry_cnt +=1 | |
| if retry_cnt == 3: | |
| raise ValueError("Error no audio input") | |
| else: | |
| break | |
| print(audio_path) | |
| try: | |
| retry_cnt = 0 | |
| for retry_cnt in range(3): | |
| try: | |
| _,file_ending = os.path.splitext(f'{audio_path}') | |
| print(f'file enging is {file_ending}') | |
| print("starting conversion to wav") | |
| new_path = audio_path.replace(file_ending, "_converted.wav") | |
| os.system(f'ffmpeg -i "{audio_path}" -ar 16000 -y -ac 1 -c:a pcm_s16le "{new_path}"') | |
| print("conversion to wav ready") | |
| break | |
| except Exception as e: | |
| time.sleep(0.5) | |
| retry_cnt +=1 | |
| if retry_cnt == 3: | |
| pass | |
| except Exception as e: | |
| raise RuntimeError(f'Error Running inference with local model: {e}') from e | |
| try: | |
| print("starting whisper c++") | |
| srt_path = new_path + ".srt" | |
| os.system(f'rm -f {srt_path}') | |
| os.system(f'./whisper.cpp/main "{new_path}" -t 4 -m ./{whisper_modelpath_translator.get(whisper_model)} -osrt -l fi') | |
| print("starting whisper done with whisper") | |
| except Exception as e: | |
| raise RuntimeError(f'Error running Whisper cpp model: {e}') from e | |
| try: | |
| df = pd.DataFrame(columns = ['start','end','text']) | |
| subs = pysrt.open(srt_path) | |
| rows = [] | |
| for sub in subs: | |
| start_hours = str(str(sub.start.hours) + "00")[0:2] if len(str(sub.start.hours)) == 2 else str("0" + str(sub.start.hours) + "00")[0:2] | |
| end_hours = str(str(sub.end.hours) + "00")[0:2] if len(str(sub.end.hours)) == 2 else str("0" + str(sub.end.hours) + "00")[0:2] | |
| start_minutes = str(str(sub.start.minutes) + "00")[0:2] if len(str(sub.start.minutes)) == 2 else str("0" + str(sub.start.minutes) + "00")[0:2] | |
| end_minutes = str(str(sub.end.minutes) + "00")[0:2] if len(str(sub.end.minutes)) == 2 else str("0" + str(sub.end.minutes) + "00")[0:2] | |
| start_seconds = str(str(sub.start.seconds) + "00")[0:2] if len(str(sub.start.seconds)) == 2 else str("0" + str(sub.start.seconds) + "00")[0:2] | |
| end_seconds = str(str(sub.end.seconds) + "00")[0:2] if len(str(sub.end.seconds)) == 2 else str("0" + str(sub.end.seconds) + "00")[0:2] | |
| start_millis = str(str(sub.start.milliseconds) + "000")[0:3] | |
| end_millis = str(str(sub.end.milliseconds) + "000")[0:3] | |
| rows.append([sub.text, f'{start_hours}:{start_minutes}:{start_seconds}.{start_millis}', f'{end_hours}:{end_minutes}:{end_seconds}.{end_millis}']) | |
| for row in rows: | |
| srt_to_df = { | |
| 'start': [row[1]], | |
| 'end': [row[2]], | |
| 'text': [row[0]] | |
| } | |
| df = pd.concat([df, pd.DataFrame(srt_to_df)]) | |
| except Exception as e: | |
| print(f"Error creating srt df with error: {e}") | |
| return df | |
| def output_to_files(df): | |
| df.reset_index(inplace=True) | |
| print("Starting SRT-file creation") | |
| print(df.head()) | |
| with open('subtitles.vtt','w', encoding="utf-8") as file: | |
| print("Starting WEBVTT-file creation") | |
| for i in range(len(df)): | |
| if i == 0: | |
| file.write('WEBVTT') | |
| file.write('\n') | |
| else: | |
| file.write(str(i+1)) | |
| file.write('\n') | |
| start = df.iloc[i]['start'] | |
| file.write(f"{start.strip()}") | |
| stop = df.iloc[i]['end'] | |
| file.write(' --> ') | |
| file.write(f"{stop}") | |
| file.write('\n') | |
| file.writelines(df.iloc[i]['text']) | |
| if int(i) != len(df)-1: | |
| file.write('\n\n') | |
| print("WEBVTT DONE") | |
| with open('subtitles.srt','w', encoding="utf-8") as file: | |
| print("Starting SRT-file creation") | |
| for i in range(len(df)): | |
| file.write(str(i+1)) | |
| file.write('\n') | |
| start = df.iloc[i]['start'] | |
| file.write(f"{start.strip()}") | |
| stop = df.iloc[i]['end'] | |
| file.write(' --> ') | |
| file.write(f"{stop}") | |
| file.write('\n') | |
| file.writelines(df.iloc[i]['text']) | |
| if int(i) != len(df)-1: | |
| file.write('\n\n') | |
| print("SRT DONE") | |
| subtitle_files_out = ['subtitles.vtt','subtitles.srt'] | |
| return subtitle_files_out | |
| # ---- Gradio Layout ----- | |
| demo = gr.Blocks(css=''' | |
| #cut_btn, #reset_btn { align-self:stretch; } | |
| #\\31 3 { max-width: 540px; } | |
| .output-markdown {max-width: 65ch !important;} | |
| ''') | |
| demo.encrypt = False | |
| with demo: | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown(''' | |
| # Simple Finnish Audio --> Text app | |
| ### This space allows you to: | |
| 1. Insert audio file or record with microphone | |
| 2. Run audio through transcription process using speech recognition models | |
| 3. Download generated transcriptions in .vtt and .srt formats | |
| ''') | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio_in = gr.Audio(label="Audio file", type='filepath') | |
| transcribe_btn = gr.Button("Step 1. Transcribe audio") | |
| selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="large", label="Selected Whisper model", interactive=True) | |
| with gr.Row(): | |
| with gr.Column(): | |
| transcription_df = gr.DataFrame(headers = ['start','end','text'], label="Transcription dataframe") | |
| with gr.Row(): | |
| with gr.Column(): | |
| translate_transcriptions_button = gr.Button("Step 2. Create subtitle files") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown('''##### From here you can download subtitles in .srt or .vtt format''') | |
| subtitle_files = gr.File( | |
| label="Download files", | |
| file_count="multiple", | |
| type="filepath", | |
| interactive=False, | |
| ) | |
| # Functionalities | |
| transcribe_btn.click(speech_to_text, [audio_in, selected_whisper_model], [transcription_df]) | |
| translate_transcriptions_button.click(output_to_files, transcription_df, [subtitle_files]) | |
| demo.launch() |