Spaces:

eubottura
/

anycoder-ef2321b6

Sleeping

App Files Files Community

anycoder-ef2321b6 / app.py

eubottura

Upload folder using huggingface_hub

c2c1bfd verified about 1 month ago

raw

history blame contribute delete

12.2 kB

	import gradio as gr
	import json
	import re
	from collections import Counter
	from datetime import timedelta
	from typing import List, Dict, Any, Optional, Tuple

	# Language-specific rules and dictionaries
	LANGUAGE_RULES = {
	"en": {
	"trigger_words": ["however", "but", "therefore", "meanwhile", "nevertheless"],
	"forbidden_endings": ["a", "an", "the", "and", "but", "or", "for", "nor", "on", "at", "to", "from", "by", "of", "in", "with"],
	"sentence_boundaries": [".", "?", "!"]
	},
	"es": {
	"trigger_words": ["sin embargo", "pero", "por lo tanto", "mientras tanto", "no obstante"],
	"forbidden_endings": ["el", "la", "los", "las", "y", "o", "para", "por", "de", "en", "con", "a", "de", "por"],
	"sentence_boundaries": [".", "?", "!"]
	},
	"fr": {
	"trigger_words": ["cependant", "mais", "donc", "pendant ce temps", "néanmoins"],
	"forbidden_endings": ["le", "la", "les", "et", "ou", "pour", "par", "de", "en", "avec", "à", "de", "par"],
	"sentence_boundaries": [".", "?", "!"]
	}
	}

	def validate_input(json_input: str) -> Tuple[bool, Optional[Dict[str, Any]]]:
	"""
	Validate the input JSON structure.

	Args:
	json_input: JSON string to validate

	Returns:
	Tuple of (is_valid, parsed_data) where parsed_data is None if invalid
	"""
	try:
	data = json.loads(json_input)
	if not isinstance(data, dict):
	return False, None
	if "text" not in data or "chunks" not in data:
	return False, None
	if not isinstance(data["chunks"], list) or len(data["chunks"]) == 0:
	return False, None
	return True, data
	except json.JSONDecodeError:
	return False, None

	def format_time(seconds: float) -> str:
	"""
	Convert seconds to SRT time format (HH:MM:SS,mmm).

	Args:
	seconds: Time in seconds

	Returns:
	Formatted time string
	"""
	td = timedelta(seconds=seconds)
	hours, remainder = divmod(td.seconds, 3600)
	minutes, seconds = divmod(remainder, 60)
	milliseconds = td.microseconds // 1000
	return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"

	def count_words(text: str) -> int:
	"""
	Count words in text (including spaces and punctuation).

	Args:
	text: Text to count words in

	Returns:
	Word count
	"""
	return len(text.split())

	def get_majority_speaker(chunks: List[Dict[str, Any]]) -> Optional[str]:
	"""
	Determine majority speaker from chunks.

	Args:
	chunks: List of chunk dictionaries

	Returns:
	Majority speaker ID or None if no speaker info
	"""
	speaker_counts = Counter()
	for chunk in chunks:
	if "speaker" in chunk:
	speaker_counts[chunk["speaker"]] += count_words(chunk["text"])
	if speaker_counts:
	return speaker_counts.most_common(1)[0][0]
	return None

	def should_break_line(line: str, language: str, word_break_threshold: int) -> bool:
	"""
	Determine if a line should break based on language rules.

	Args:
	line: Text line to check
	language: ISO language code
	word_break_threshold: Maximum words per line

	Returns:
	True if line should break
	"""
	# Check word count threshold
	if count_words(line) > word_break_threshold:
	return True

	# Check character limit (11 chars excluding spaces)
	chars_excluding_spaces = len(re.sub(r'\s+', '', line))
	if chars_excluding_spaces > 11:
	return True

	# Check for trigger words
	rules = LANGUAGE_RULES.get(language, LANGUAGE_RULES["en"])
	for trigger in rules["trigger_words"]:
	if trigger.lower() in line.lower():
	return True

	# Check for forbidden endings
	last_word = line.strip().split()[-1].lower() if line.strip() else ""
	if last_word in rules["forbidden_endings"]:
	return True

	return False

	def format_speaker_change(speaker_id: str) -> str:
	"""
	Format speaker identifier for SRT.

	Args:
	speaker_id: Speaker identifier

	Returns:
	Formatted speaker marker
	"""
	return f"[{speaker_id}] "

	def process_chunks_to_srt(
	chunks: List[Dict[str, Any]],
	word_break_threshold: int,
	language: str,
	include_speaker: bool
	) -> str:
	"""
	Convert transcription chunks to SRT format.

	Args:
	chunks: List of chunk dictionaries
	word_break_threshold: Maximum words per subtitle block
	language: ISO language code
	include_speaker: Whether to include speaker information

	Returns:
	SRT formatted string
	"""
	srt_segments = []
	current_segment = []
	current_speaker = None
	current_start_time = None
	current_end_time = None

	# Process chunks to create segments
	for i, chunk in enumerate(chunks):
	text = chunk["text"]
	start_time = chunk["timestamp"][0]
	end_time = chunk["timestamp"][1]

	# Initialize current segment with first chunk
	if not current_segment:
	current_segment = [text]
	current_start_time = start_time
	current_end_time = end_time
	current_speaker = chunk.get("speaker")
	continue

	# Check if we should start a new segment
	should_break = False

	# Check sentence boundaries
	if text.strip() and text.strip()[0] in LANGUAGE_RULES.get(language, LANGUAGE_RULES["en"])["sentence_boundaries"]:
	should_break = True

	# Check word count threshold
	total_words = sum(count_words(t) for t in current_segment)
	if total_words + count_words(text) > word_break_threshold:
	should_break = True

	# Check speaker change (if speaker info available)
	if include_speaker and "speaker" in chunk and chunk["speaker"] != current_speaker:
	should_break = True

	if should_break:
	# Finalize current segment
	segment_text = " ".join(current_segment).strip()
	srt_segments.append({
	"start": current_start_time,
	"end": current_end_time,
	"text": segment_text,
	"speaker": current_speaker
	})

	# Start new segment
	current_segment = [text]
	current_start_time = start_time
	current_end_time = end_time
	current_speaker = chunk.get("speaker")
	else:
	# Continue current segment
	current_segment.append(text)
	current_end_time = end_time

	# Add final segment
	if current_segment:
	segment_text = " ".join(current_segment).strip()
	srt_segments.append({
	"start": current_start_time,
	"end": current_end_time,
	"text": segment_text,
	"speaker": current_speaker
	})

	# Format segments as SRT
	srt_lines = []
	for i, segment in enumerate(srt_segments, 1):
	start_time = format_time(segment["start"])
	end_time = format_time(segment["end"])
	text = segment["text"]

	# Apply speaker marker if needed
	if include_speaker and segment["speaker"]:
	text = format_speaker_change(segment["speaker"]) + text

	# Format SRT block
	srt_lines.append(str(i))
	srt_lines.append(f"{start_time} --> {end_time}")
	srt_lines.append(text)
	srt_lines.append("") # Blank line between segments

	return "\n".join(srt_lines).strip()

	def convert_transcription(
	json_input: str,
	word_break_threshold: int,
	language: str,
	include_speaker: bool
	) -> Tuple[str, str]:
	"""
	Main conversion function from Transcribe JSON to SRT.

	Args:
	json_input: JSON input string
	word_break_threshold: Maximum words per subtitle block
	language: ISO language code
	include_speaker: Whether to include speaker information

	Returns:
	Tuple of (srt_output, status_message)
	"""
	# Validate input
	is_valid, data = validate_input(json_input)
	if not is_valid:
	return "", "Invalid JSON input: Missing required 'text' or 'chunks' fields"

	# Process chunks to SRT
	try:
	srt_output = process_chunks_to_srt(
	data["chunks"],
	word_break_threshold,
	language,
	include_speaker
	)
	return srt_output, "Conversion successful"
	except Exception as e:
	return "", f"Error during conversion: {str(e)}"

	# Create Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# Transcription Format Converter")
	gr.Markdown("Convert Transcribe JSON format to SRT subtitle format with configurable options")
	gr.Markdown("Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")

	with gr.Row():
	with gr.Column():
	# Input section
	json_input = gr.Textbox(
	label="Transcribe JSON Input",
	placeholder='{"text": "Full text", "chunks": [{"text": "Segment 1", "timestamp": [0, 2.5]}, ...]}',
	lines=10
	)

	# Parameters
	word_break_threshold = gr.Slider(
	minimum=5,
	maximum=20,
	value=10,
	step=1,
	label="Word Break Threshold"
	)

	language = gr.Dropdown(
	choices=["en", "es", "fr"],
	value="en",
	label="Language"
	)

	include_speaker = gr.Checkbox(
	label="Include Speaker Information",
	value=False
	)

	convert_btn = gr.Button("Convert to SRT", variant="primary")

	with gr.Column():
	# Output section
	srt_output = gr.Textbox(
	label="SRT Output",
	lines=15,
	placeholder="SRT formatted subtitles will appear here..."
	)

	status_message = gr.Textbox(
	label="Status",
	interactive=False
	)

	# Examples
	examples = gr.Examples(
	examples=[
	[
	'{"text": "Hello world. This is a test. How are you today?", "chunks": [{"text": "Hello world.", "timestamp": [0, 1.5]}, {"text": "This is a test.", "timestamp": [1.5, 3.2]}, {"text": "How are you today?", "timestamp": [3.2, 5.0]}]}',
	10,
	"en",
	False
	],
	[
	'{"text": "Hola mundo. Esto es una prueba. ¿Cómo estás hoy?", "chunks": [{"text": "Hola mundo.", "timestamp": [0, 1.5]}, {"text": "Esto es una prueba.", "timestamp": [1.5, 3.2]}, {"text": "¿Cómo estás hoy?", "timestamp": [3.2, 5.0]}]}',
	10,
	"es",
	False
	]
	],
	inputs=[json_input, word_break_threshold, language, include_speaker],
	outputs=[srt_output, status_message],
	fn=convert_transcription,
	cache_examples=True,
	label="Examples"
	)

	# Event listener
	convert_btn.click(
	fn=convert_transcription,
	inputs=[json_input, word_break_threshold, language, include_speaker],
	outputs=[srt_output, status_message],
	api_visibility="public"
	)

	# Launch with modern theme and styling
	demo.launch(
	theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="indigo",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Inter"),
	text_size="lg",
	spacing_size="lg",
	radius_size="md"
	).set(
	button_primary_background_fill="*primary_600",
	button_primary_background_fill_hover="*primary_700",
	block_title_text_weight="600",
	),
	footer_links=[
	{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
	{"label": "Gradio Docs", "url": "https://www.gradio.app/docs"},
	{"label": "GitHub", "url": "https://github.com/gradio-app/gradio"}
	],
	css="""
	.gradio-container {
	max-width: 1200px !important;
	margin: 0 auto !important;
	}
	.gr-box {
	border-radius: 8px !important;
	}
	"""
	)