Spaces:
Build error
Build error
| import gradio as gr | |
| import subprocess | |
| import os | |
| import whisper | |
| from whisper.utils import write_vtt | |
| model = whisper.load_model('tiny') | |
| title = 'Add Captions(CC) to your videos' | |
| def convert_mp4_mp3(file, output="mp3"): | |
| """ | |
| Convert the Input Video files to Audio files (MP4 -> MP3) | |
| using FFMPEG | |
| """ | |
| filename, ext = os.path.splitext(file) | |
| subprocess.call(['ffmpeg', '-y', '-i', file, f'{filename}.{output}'], | |
| stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) | |
| return f"{filename}.{output}" | |
| def transcribe(video): | |
| """ | |
| Transcribe the text in the video file using Whisper model | |
| and write the transcribed captions to the video | |
| """ | |
| audio_file = convert_mp4_mp3(video) | |
| # CFG | |
| options = dict(beam_size=5, best_of=5, fp16=False) | |
| translate_options = dict(task='translate', **options) | |
| result = model.transcribe(audio_file, **translate_options) | |
| output_dir = '' | |
| # audio_path = audio_file.split('.')[0] | |
| audio_path = os.path.splitext(os.path.basename(audio_file))[0] | |
| # Write Subtitle onto a .vtt file | |
| with open(os.path.join(output_dir, audio_path + '.vtt'), 'w') as f: | |
| write_vtt(result['segments'], file=f) | |
| # Write the subtitles on the input video | |
| # subtitle = audio_path + '.vtt' | |
| # output_video = audio_path + '_subtitled.mp4' | |
| # os.system(f'ffmpeg -i {video} -vf subtitles={subtitle} {output_video}') | |
| output_video = os.path.join(output_dir, f'{audio_path}_subtitled.mp4') | |
| os.system(f'ffmpeg -i {video} -vf subtitles={os.path.join(output_dir, audio_path + ".vtt")} {output_video}') | |
| return output_video | |
| block = gr.Blocks() | |
| with block: | |
| with gr.Group(): | |
| with gr.Box(): | |
| with gr.Row().style(): | |
| input_video = gr.Video( | |
| label="Input Video", | |
| type="filepath", | |
| mirror_webcam=False | |
| ) | |
| output_video = gr.Video() | |
| btn = gr.Button('Generate Subtitle Video') | |
| btn.click(transcribe, inputs=[input_video], outputs=[output_video]) | |
| block.launch(enable_queue=True) |