Spaces:

eubottura
/

anycoder-ef2321b6

Sleeping

App Files Files Community

eubottura commited on 28 days ago

Commit

c2c1bfd

verified ·

1 Parent(s): c23b652

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +389 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,389 @@

+import gradio as gr
+import json
+import re
+from collections import Counter
+from datetime import timedelta
+from typing import List, Dict, Any, Optional, Tuple
+# Language-specific rules and dictionaries
+LANGUAGE_RULES = {
+    "en": {
+        "trigger_words": ["however", "but", "therefore", "meanwhile", "nevertheless"],
+        "forbidden_endings": ["a", "an", "the", "and", "but", "or", "for", "nor", "on", "at", "to", "from", "by", "of", "in", "with"],
+        "sentence_boundaries": [".", "?", "!"]
+    },
+    "es": {
+        "trigger_words": ["sin embargo", "pero", "por lo tanto", "mientras tanto", "no obstante"],
+        "forbidden_endings": ["el", "la", "los", "las", "y", "o", "para", "por", "de", "en", "con", "a", "de", "por"],
+        "sentence_boundaries": [".", "?", "!"]
+    },
+    "fr": {
+        "trigger_words": ["cependant", "mais", "donc", "pendant ce temps", "néanmoins"],
+        "forbidden_endings": ["le", "la", "les", "et", "ou", "pour", "par", "de", "en", "avec", "à", "de", "par"],
+        "sentence_boundaries": [".", "?", "!"]
+    }
+}
+def validate_input(json_input: str) -> Tuple[bool, Optional[Dict[str, Any]]]:
+    """
+    Validate the input JSON structure.
+    Args:
+        json_input: JSON string to validate
+    Returns:
+        Tuple of (is_valid, parsed_data) where parsed_data is None if invalid
+    """
+    try:
+        data = json.loads(json_input)
+        if not isinstance(data, dict):
+            return False, None
+        if "text" not in data or "chunks" not in data:
+            return False, None
+        if not isinstance(data["chunks"], list) or len(data["chunks"]) == 0:
+            return False, None
+        return True, data
+    except json.JSONDecodeError:
+        return False, None
+def format_time(seconds: float) -> str:
+    """
+    Convert seconds to SRT time format (HH:MM:SS,mmm).
+    Args:
+        seconds: Time in seconds
+    Returns:
+        Formatted time string
+    """
+    td = timedelta(seconds=seconds)
+    hours, remainder = divmod(td.seconds, 3600)
+    minutes, seconds = divmod(remainder, 60)
+    milliseconds = td.microseconds // 1000
+    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
+def count_words(text: str) -> int:
+    """
+    Count words in text (including spaces and punctuation).
+    Args:
+        text: Text to count words in
+    Returns:
+        Word count
+    """
+    return len(text.split())
+def get_majority_speaker(chunks: List[Dict[str, Any]]) -> Optional[str]:
+    """
+    Determine majority speaker from chunks.
+    Args:
+        chunks: List of chunk dictionaries
+    Returns:
+        Majority speaker ID or None if no speaker info
+    """
+    speaker_counts = Counter()
+    for chunk in chunks:
+        if "speaker" in chunk:
+            speaker_counts[chunk["speaker"]] += count_words(chunk["text"])
+    if speaker_counts:
+        return speaker_counts.most_common(1)[0][0]
+    return None
+def should_break_line(line: str, language: str, word_break_threshold: int) -> bool:
+    """
+    Determine if a line should break based on language rules.
+    Args:
+        line: Text line to check
+        language: ISO language code
+        word_break_threshold: Maximum words per line
+    Returns:
+        True if line should break
+    """
+    # Check word count threshold
+    if count_words(line) > word_break_threshold:
+        return True
+    # Check character limit (11 chars excluding spaces)
+    chars_excluding_spaces = len(re.sub(r'\s+', '', line))
+    if chars_excluding_spaces > 11:
+        return True
+    # Check for trigger words
+    rules = LANGUAGE_RULES.get(language, LANGUAGE_RULES["en"])
+    for trigger in rules["trigger_words"]:
+        if trigger.lower() in line.lower():
+            return True
+    # Check for forbidden endings
+    last_word = line.strip().split()[-1].lower() if line.strip() else ""
+    if last_word in rules["forbidden_endings"]:
+        return True
+    return False
+def format_speaker_change(speaker_id: str) -> str:
+    """
+    Format speaker identifier for SRT.
+    Args:
+        speaker_id: Speaker identifier
+    Returns:
+        Formatted speaker marker
+    """
+    return f"[{speaker_id}] "
+def process_chunks_to_srt(
+    chunks: List[Dict[str, Any]],
+    word_break_threshold: int,
+    language: str,
+    include_speaker: bool
+) -> str:
+    """
+    Convert transcription chunks to SRT format.
+    Args:
+        chunks: List of chunk dictionaries
+        word_break_threshold: Maximum words per subtitle block
+        language: ISO language code
+        include_speaker: Whether to include speaker information
+    Returns:
+        SRT formatted string
+    """
+    srt_segments = []
+    current_segment = []
+    current_speaker = None
+    current_start_time = None
+    current_end_time = None
+    # Process chunks to create segments
+    for i, chunk in enumerate(chunks):
+        text = chunk["text"]
+        start_time = chunk["timestamp"][0]
+        end_time = chunk["timestamp"][1]
+        # Initialize current segment with first chunk
+        if not current_segment:
+            current_segment = [text]
+            current_start_time = start_time
+            current_end_time = end_time
+            current_speaker = chunk.get("speaker")
+            continue
+        # Check if we should start a new segment
+        should_break = False
+        # Check sentence boundaries
+        if text.strip() and text.strip()[0] in LANGUAGE_RULES.get(language, LANGUAGE_RULES["en"])["sentence_boundaries"]:
+            should_break = True
+        # Check word count threshold
+        total_words = sum(count_words(t) for t in current_segment)
+        if total_words + count_words(text) > word_break_threshold:
+            should_break = True
+        # Check speaker change (if speaker info available)
+        if include_speaker and "speaker" in chunk and chunk["speaker"] != current_speaker:
+            should_break = True
+        if should_break:
+            # Finalize current segment
+            segment_text = " ".join(current_segment).strip()
+            srt_segments.append({
+                "start": current_start_time,
+                "end": current_end_time,
+                "text": segment_text,
+                "speaker": current_speaker
+            })
+            # Start new segment
+            current_segment = [text]
+            current_start_time = start_time
+            current_end_time = end_time
+            current_speaker = chunk.get("speaker")
+        else:
+            # Continue current segment
+            current_segment.append(text)
+            current_end_time = end_time
+    # Add final segment
+    if current_segment:
+        segment_text = " ".join(current_segment).strip()
+        srt_segments.append({
+            "start": current_start_time,
+            "end": current_end_time,
+            "text": segment_text,
+            "speaker": current_speaker
+        })
+    # Format segments as SRT
+    srt_lines = []
+    for i, segment in enumerate(srt_segments, 1):
+        start_time = format_time(segment["start"])
+        end_time = format_time(segment["end"])
+        text = segment["text"]
+        # Apply speaker marker if needed
+        if include_speaker and segment["speaker"]:
+            text = format_speaker_change(segment["speaker"]) + text
+        # Format SRT block
+        srt_lines.append(str(i))
+        srt_lines.append(f"{start_time} --> {end_time}")
+        srt_lines.append(text)
+        srt_lines.append("")  # Blank line between segments
+    return "\n".join(srt_lines).strip()
+def convert_transcription(
+    json_input: str,
+    word_break_threshold: int,
+    language: str,
+    include_speaker: bool
+) -> Tuple[str, str]:
+    """
+    Main conversion function from Transcribe JSON to SRT.
+    Args:
+        json_input: JSON input string
+        word_break_threshold: Maximum words per subtitle block
+        language: ISO language code
+        include_speaker: Whether to include speaker information
+    Returns:
+        Tuple of (srt_output, status_message)
+    """
+    # Validate input
+    is_valid, data = validate_input(json_input)
+    if not is_valid:
+        return "", "Invalid JSON input: Missing required 'text' or 'chunks' fields"
+    # Process chunks to SRT
+    try:
+        srt_output = process_chunks_to_srt(
+            data["chunks"],
+            word_break_threshold,
+            language,
+            include_speaker
+        )
+        return srt_output, "Conversion successful"
+    except Exception as e:
+        return "", f"Error during conversion: {str(e)}"
+# Create Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Transcription Format Converter")
+    gr.Markdown("Convert Transcribe JSON format to SRT subtitle format with configurable options")
+    gr.Markdown("Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
+    with gr.Row():
+        with gr.Column():
+            # Input section
+            json_input = gr.Textbox(
+                label="Transcribe JSON Input",
+                placeholder='{"text": "Full text", "chunks": [{"text": "Segment 1", "timestamp": [0, 2.5]}, ...]}',
+                lines=10
+            )
+            # Parameters
+            word_break_threshold = gr.Slider(
+                minimum=5,
+                maximum=20,
+                value=10,
+                step=1,
+                label="Word Break Threshold"
+            )
+            language = gr.Dropdown(
+                choices=["en", "es", "fr"],
+                value="en",
+                label="Language"
+            )
+            include_speaker = gr.Checkbox(
+                label="Include Speaker Information",
+                value=False
+            )
+            convert_btn = gr.Button("Convert to SRT", variant="primary")
+        with gr.Column():
+            # Output section
+            srt_output = gr.Textbox(
+                label="SRT Output",
+                lines=15,
+                placeholder="SRT formatted subtitles will appear here..."
+            )
+            status_message = gr.Textbox(
+                label="Status",
+                interactive=False
+            )
+    # Examples
+    examples = gr.Examples(
+        examples=[
+            [
+                '{"text": "Hello world. This is a test. How are you today?", "chunks": [{"text": "Hello world.", "timestamp": [0, 1.5]}, {"text": "This is a test.", "timestamp": [1.5, 3.2]}, {"text": "How are you today?", "timestamp": [3.2, 5.0]}]}',
+                10,
+                "en",
+                False
+            ],
+            [
+                '{"text": "Hola mundo. Esto es una prueba. ¿Cómo estás hoy?", "chunks": [{"text": "Hola mundo.", "timestamp": [0, 1.5]}, {"text": "Esto es una prueba.", "timestamp": [1.5, 3.2]}, {"text": "¿Cómo estás hoy?", "timestamp": [3.2, 5.0]}]}',
+                10,
+                "es",
+                False
+            ]
+        ],
+        inputs=[json_input, word_break_threshold, language, include_speaker],
+        outputs=[srt_output, status_message],
+        fn=convert_transcription,
+        cache_examples=True,
+        label="Examples"
+    )
+    # Event listener
+    convert_btn.click(
+        fn=convert_transcription,
+        inputs=[json_input, word_break_threshold, language, include_speaker],
+        outputs=[srt_output, status_message],
+        api_visibility="public"
+    )
+# Launch with modern theme and styling
+demo.launch(
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="indigo",
+        neutral_hue="slate",
+        font=gr.themes.GoogleFont("Inter"),
+        text_size="lg",
+        spacing_size="lg",
+        radius_size="md"
+    ).set(
+        button_primary_background_fill="*primary_600",
+        button_primary_background_fill_hover="*primary_700",
+        block_title_text_weight="600",
+    ),
+    footer_links=[
+        {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
+        {"label": "Gradio Docs", "url": "https://www.gradio.app/docs"},
+        {"label": "GitHub", "url": "https://github.com/gradio-app/gradio"}
+    ],
+    css="""
+    .gradio-container {
+        max-width: 1200px !important;
+        margin: 0 auto !important;
+    }
+    .gr-box {
+        border-radius: 8px !important;
+    }
+    """
+)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio>=6.0
+requests
+Pillow