Spaces:

Nav3005
/

audio-to-srt

Running

App Files Files Community

audio-to-srt / app.py

Nav3005

Update app.py

5ea61b6 verified 3 months ago

raw

history blame contribute delete

5.23 kB

	import os
	os.environ["OMP_NUM_THREADS"] = "1"
	os.environ["MKL_NUM_THREADS"] = "1"

	import gradio as gr
	import pysrt
	import requests
	import tempfile
	from faster_whisper import WhisperModel
	from datetime import timedelta
	from urllib.parse import urlparse


	# -----------------------------
	# Core subtitle generator
	# -----------------------------
	class LinearSubtitleGenerator:
	def __init__(self, model_size="base"):
	self.model = WhisperModel(
	model_size,
	device="cpu",
	compute_type="int8"
	)

	def transcribe(self, audio_path):
	segments, _ = self.model.transcribe(
	audio_path,
	word_timestamps=True,
	vad_filter=True
	)
	return segments

	def extract_words(self, segments):
	words = []
	for segment in segments:
	if not segment.words:
	continue
	for w in segment.words:
	if w.start is None or w.end is None:
	continue
	words.append({
	"word": w.word.strip(),
	"start": float(w.start),
	"end": float(w.end)
	})
	return words

	def create_linear_subtitles(self, words):
	subs = pysrt.SubRipFile()

	total_words = len(words)
	index = 0
	subtitle_index = 1
	current_size = 1 # 1,2,3,4,...

	while index < total_words:
	planned_size = current_size
	remaining = total_words - (index + planned_size)
	next_size = current_size + 1

	# absorb leftovers to avoid tiny last subtitle
	if remaining > 0 and remaining < next_size:
	planned_size += remaining

	subtitle_words = []
	start_time = None
	end_time = None

	for _ in range(planned_size):
	if index >= total_words:
	break
	w = words[index]
	subtitle_words.append(w["word"])
	if start_time is None:
	start_time = w["start"]
	end_time = w["end"]
	index += 1

	subs.append(
	pysrt.SubRipItem(
	index=subtitle_index,
	start=self._to_time(start_time),
	end=self._to_time(end_time),
	text=" ".join(subtitle_words)
	)
	)
	subtitle_index += 1

	if planned_size == current_size:
	current_size += 1
	else:
	break

	return subs

	def _to_time(self, seconds):
	td = timedelta(seconds=seconds)
	return pysrt.SubRipTime(
	hours=td.seconds // 3600,
	minutes=(td.seconds % 3600) // 60,
	seconds=td.seconds % 60,
	milliseconds=td.microseconds // 1000
	)


	# -----------------------------
	# Helper: download audio from URL
	# -----------------------------
	def download_audio(url: str) -> str:
	parsed = urlparse(url)
	if parsed.scheme not in ("http", "https"):
	raise ValueError("Invalid URL scheme")

	response = requests.get(url, stream=True, timeout=30)
	response.raise_for_status()

	suffix = os.path.splitext(parsed.path)[1] or ".wav"
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)

	for chunk in response.iter_content(chunk_size=8192):
	tmp.write(chunk)

	tmp.close()
	return tmp.name


	# -----------------------------
	# Gradio callable function
	# -----------------------------
	def generate_srt(audio_file, audio_url, model_size):
	# exactly one input must be provided
	if bool(audio_file) == bool(audio_url):
	raise gr.Error(
	"Please provide EITHER an audio file OR an audio URL (not both)."
	)

	if audio_url:
	audio_path = download_audio(audio_url)
	else:
	audio_path = audio_file

	generator = LinearSubtitleGenerator(model_size)

	segments = generator.transcribe(audio_path)
	words = generator.extract_words(segments)
	subs = generator.create_linear_subtitles(words)

	out = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
	subs.save(out.name, encoding="utf-8")

	return out.name


	# -----------------------------
	# Gradio UI (UNCHANGED)
	# -----------------------------
	with gr.Blocks(title="Subtitle Generator") as demo:
	gr.Markdown(
	"""
	# srt generator
	"""
	)

	with gr.Row():
	audio_file = gr.Audio(
	label="Upload Audio File",
	type="filepath"
	)

	audio_url = gr.Textbox(
	label="Audio URL (http/https)",
	placeholder="https://example.com/audio.wav"
	)

	model_choice = gr.Dropdown(
	choices=["tiny", "base", "small", "medium"],
	value="base",
	label="Whisper Model"
	)

	generate_btn = gr.Button("Generate SRT")

	output_file = gr.File(label="Download SRT")

	generate_btn.click(
	fn=generate_srt,
	inputs=[audio_file, audio_url, model_choice],
	outputs=output_file
	)


	if __name__ == "__main__":
	demo.launch(mcp_server=True)