Spaces:
Running
Running
| import csv | |
| import datetime | |
| import requests | |
| import gradio as gr | |
| import pandas as pd | |
| from io import BytesIO | |
| from pathlib import Path | |
| from urllib.parse import urlparse | |
| from pydub import AudioSegment, silence | |
| def format_seconds(secs): | |
| t = datetime.datetime( | |
| year=1, month=1, day=1, hour=0, minute=0 | |
| ) + datetime.timedelta(seconds=secs) | |
| return t.strftime("%M:%S.%f")[:-3] | |
| def get_filename_and_extension(url): | |
| parsed_url = urlparse(url) | |
| path = parsed_url.path | |
| filename = Path(path).name | |
| filename_without_extension = Path(filename).stem | |
| file_extension = Path(filename).suffix | |
| return filename, filename_without_extension, file_extension | |
| def calculate_times(input_url, input_text, ms_before, ms_after): | |
| _, _, file_extension = get_filename_and_extension(input_url) | |
| file_extension = file_extension.replace(".", "") | |
| res = requests.get(input_url) | |
| audio = AudioSegment.from_file(BytesIO(res.content), file_extension) | |
| non_silent_parts = silence.detect_nonsilent( | |
| audio, min_silence_len=1250, silence_thresh=-80 | |
| ) | |
| segments = [ | |
| ( | |
| format_seconds((start - ms_before) / 1000), | |
| format_seconds((stop + ms_after) / 1000), | |
| ) | |
| for start, stop in non_silent_parts | |
| ] | |
| df = pd.DataFrame({"text": [], "start": [], "stop": [], "file": []}) | |
| lines = input_text.splitlines() | |
| if len(lines) != len(segments): | |
| msg = f"DETECTED CLIPS AND INPUT LINES DO NOT MATCH!\n\nYou are expecting {len(lines)} clips BUT {len(segments)} segments have been found in the video file.\n\nPlease, review the list of clips or transcribe the audio to check the clips.\n\nUSEFUL FREE TOOLS:\n\nTranscribe audio to VTT file\nhttps://replicate.com/openai/whisper\n\nVTT file viewer\nhttps://www.happyscribe.com/subtitle-tools/online-subtitle-editor/free" | |
| df.loc[len(df.index)] = ["", "", "", ""] | |
| return msg, None, df | |
| else: | |
| res = [] | |
| for i in range(len(segments)): | |
| line = lines[i].rstrip() | |
| res.append(f"{line}\t{segments[i][0]}\t{segments[i][1]}\t{input_url}") | |
| df.loc[len(df.index)] = [line, segments[i][0], segments[i][1], input_url] | |
| df.to_csv( | |
| "clips.tsv", | |
| sep="\t", | |
| encoding="utf-8", | |
| index=False, | |
| header=False, | |
| quoting=csv.QUOTE_NONE, | |
| ) | |
| return "\n".join(res), "clips.tsv", df | |
| def load_video(input_url): | |
| if input_url: | |
| return input_url | |
| return None | |
| css = """ | |
| .required {background-color: #FFCCCB !important, font-size: 24px !important} | |
| """ | |
| with gr.Blocks(title="Start and stop times", css=css) as app: | |
| gr.Markdown( | |
| """# Start and stop times generator | |
| Please, fill the Video URL and Clip texts textboxes and click the Run button""" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| text1 = gr.Textbox( | |
| lines=1, | |
| placeholder="Video URL...", | |
| label="Video URL", | |
| elem_classes=["required"], | |
| ) | |
| text2 = gr.Textbox( | |
| lines=5, | |
| max_lines=10, | |
| placeholder="List of clip texts...", | |
| label="Clip texts", | |
| elem_classes=["required"], | |
| ) | |
| slider1 = gr.Slider( | |
| minimum=0, | |
| maximum=1000, | |
| step=50, | |
| value=0, | |
| label="Milliseconds BEFORE each clip", | |
| ) | |
| slider2 = gr.Slider( | |
| minimum=0, | |
| maximum=1000, | |
| step=50, | |
| value=500, | |
| label="Milliseconds AFTER each clip", | |
| ) | |
| btn_submit = gr.Button(value="Run", variant="primary", size="sm") | |
| video = gr.Video( | |
| format="mp4", label="Video file", show_label=True, interactive=False | |
| ) | |
| with gr.Column(scale=5): | |
| file = gr.File( | |
| label="Clips", show_label=True, interactive=False, file_count="single" | |
| ) | |
| lines = gr.Textbox( | |
| lines=10, label="Clips", interactive=False, show_copy_button=True | |
| ) | |
| data = gr.Dataframe( | |
| label="Clips", | |
| headers=["text", "start", "stop", "file"], | |
| datatype=["str", "str", "str", "str"], | |
| # row_count=0, | |
| ) | |
| btn_submit.click( | |
| calculate_times, | |
| inputs=[text1, text2, slider1, slider2], | |
| outputs=[lines, file, data], | |
| ) | |
| text1.blur(load_video, inputs=[text1], outputs=[video]) | |
| app.launch() | |