Spaces:

Quantified
/

clip-detector

Running

App Files Files Community

clip-detector / app.py

sergiomar73

Update app.py

db6fe2d verified over 1 year ago

raw

history blame contribute delete

4.67 kB

	import csv
	import datetime
	import requests
	import gradio as gr
	import pandas as pd
	from io import BytesIO
	from pathlib import Path
	from urllib.parse import urlparse
	from pydub import AudioSegment, silence


	def format_seconds(secs):
	t = datetime.datetime(
	year=1, month=1, day=1, hour=0, minute=0
	) + datetime.timedelta(seconds=secs)
	return t.strftime("%M:%S.%f")[:-3]


	def get_filename_and_extension(url):
	parsed_url = urlparse(url)
	path = parsed_url.path
	filename = Path(path).name
	filename_without_extension = Path(filename).stem
	file_extension = Path(filename).suffix
	return filename, filename_without_extension, file_extension


	def calculate_times(input_url, input_text, ms_before, ms_after):
	_, _, file_extension = get_filename_and_extension(input_url)
	file_extension = file_extension.replace(".", "")
	res = requests.get(input_url)
	audio = AudioSegment.from_file(BytesIO(res.content), file_extension)
	non_silent_parts = silence.detect_nonsilent(
	audio, min_silence_len=1250, silence_thresh=-80
	)
	segments = [
	(
	format_seconds((start - ms_before) / 1000),
	format_seconds((stop + ms_after) / 1000),
	)
	for start, stop in non_silent_parts
	]
	df = pd.DataFrame({"text": [], "start": [], "stop": [], "file": []})
	lines = input_text.splitlines()
	if len(lines) != len(segments):
	msg = f"DETECTED CLIPS AND INPUT LINES DO NOT MATCH!\n\nYou are expecting {len(lines)} clips BUT {len(segments)} segments have been found in the video file.\n\nPlease, review the list of clips or transcribe the audio to check the clips.\n\nUSEFUL FREE TOOLS:\n\nTranscribe audio to VTT file\nhttps://replicate.com/openai/whisper\n\nVTT file viewer\nhttps://www.happyscribe.com/subtitle-tools/online-subtitle-editor/free"
	df.loc[len(df.index)] = ["", "", "", ""]
	return msg, None, df
	else:
	res = []
	for i in range(len(segments)):
	line = lines[i].rstrip()
	res.append(f"{line}\t{segments[i][0]}\t{segments[i][1]}\t{input_url}")
	df.loc[len(df.index)] = [line, segments[i][0], segments[i][1], input_url]
	df.to_csv(
	"clips.tsv",
	sep="\t",
	encoding="utf-8",
	index=False,
	header=False,
	quoting=csv.QUOTE_NONE,
	)
	return "\n".join(res), "clips.tsv", df


	def load_video(input_url):
	if input_url:
	return input_url
	return None


	css = """
	.required {background-color: #FFCCCB !important, font-size: 24px !important}
	"""

	with gr.Blocks(title="Start and stop times", css=css) as app:
	gr.Markdown(
	"""# Start and stop times generator
	Please, fill the Video URL and Clip texts textboxes and click the Run button"""
	)
	with gr.Row():
	with gr.Column(scale=3):
	text1 = gr.Textbox(
	lines=1,
	placeholder="Video URL...",
	label="Video URL",
	elem_classes=["required"],
	)
	text2 = gr.Textbox(
	lines=5,
	max_lines=10,
	placeholder="List of clip texts...",
	label="Clip texts",
	elem_classes=["required"],
	)
	slider1 = gr.Slider(
	minimum=0,
	maximum=1000,
	step=50,
	value=0,
	label="Milliseconds BEFORE each clip",
	)
	slider2 = gr.Slider(
	minimum=0,
	maximum=1000,
	step=50,
	value=500,
	label="Milliseconds AFTER each clip",
	)
	btn_submit = gr.Button(value="Run", variant="primary", size="sm")
	video = gr.Video(
	format="mp4", label="Video file", show_label=True, interactive=False
	)
	with gr.Column(scale=5):
	file = gr.File(
	label="Clips", show_label=True, interactive=False, file_count="single"
	)
	lines = gr.Textbox(
	lines=10, label="Clips", interactive=False, show_copy_button=True
	)
	data = gr.Dataframe(
	label="Clips",
	headers=["text", "start", "stop", "file"],
	datatype=["str", "str", "str", "str"],
	# row_count=0,
	)
	btn_submit.click(
	calculate_times,
	inputs=[text1, text2, slider1, slider2],
	outputs=[lines, file, data],
	)
	text1.blur(load_video, inputs=[text1], outputs=[video])

	app.launch()