insta-maker-3

Runtime error

App Files Files Community

hivecorp commited on Jun 25, 2025

Commit

eceecf3

verified ·

1 Parent(s): c53e8a0

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -188

app.py CHANGED Viewed

@@ -1,189 +1,83 @@
 import gradio as gr
-import edge_tts
-import asyncio
-import tempfile
-import os
-import re
-from pydub import AudioSegment # Required for audio duration, needs ffmpeg installed
-# Get all available voices
-async def get_voices():
-    """Fetches all available voices from the Edge TTS service."""
-    voices = await edge_tts.list_voices()
-    # Format voice names for display in the dropdown
-    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
-# Text-to-speech function
-async def text_to_speech(text, voice, rate, pitch):
-    """
-    Converts text to speech using Edge TTS and saves it to a temporary file.
-    Returns the path to the generated audio file and the original text for SRT generation.
-    """
-    if not text.strip():
-        # Return a string for the warning, instead of a gr.Warning object directly
-        return None, None, "Please enter text to convert."
-    if not voice:
-        # Return a string for the warning
-        return None, None, "Please select a voice."
-    # Extract the short name from the selected voice string
-    voice_short_name = voice.split(" - ")[0]
-    # Format rate and pitch for the Edge TTS API
-    rate_str = f"{rate:+d}%"
-    pitch_str = f"{pitch:+d}Hz"
-    # Initialize the Edge TTS communicator
-    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
-    # Create a temporary file to save the audio
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
-        tmp_path = tmp_file.name
-        await communicate.save(tmp_path)
-    return tmp_path, text, "" # Return audio path, original text, and an empty string for no warning
-def format_time(ms):
-    """
-    Formats milliseconds into SRT time format (HH:MM:SS,mmm).
-    """
-    hours = int(ms / 3_600_000)
-    ms %= 3_600_000
-    minutes = int(ms / 60_000)
-    ms %= 60_000
-    seconds = int(ms / 1_000)
-    milliseconds = int(ms % 1_000)
-    return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
-def generate_srt(text_input, audio_filepath):
-    """
-    Generates a basic SRT file based on text input and estimated timings
-    from audio duration. Timings are proportional to segment text length.
-    Note: This does not use advanced audio analysis for precise timing of pauses.
-    It's an estimation based on character count per segment.
-    Requires ffmpeg installed for pydub to read audio duration.
-    """
-    if not text_input or not audio_filepath:
-        return None
-    try:
-        # Load audio to get its total duration using pydub
-        audio = AudioSegment.from_file(audio_filepath)
-        audio_duration_ms = len(audio)
-    except Exception as e:
-        print(f"Error getting audio duration with pydub: {e}. SRT generation requires ffmpeg.")
-        # If pydub fails (e.g., ffmpeg not found), return None for SRT
-        return None
-    # Split text into segments. This regex splits on common sentence-ending
-    # punctuation, keeping the punctuation with the segment, and also handles newlines.
-    segments = re.findall(r'[^.!?,\n]+[.!?,\n]*', text_input)
-    segments = [s.strip() for s in segments if s.strip()] # Clean up empty strings
-    if not segments:
-        return None
-    srt_content = []
-    current_time_ms = 0
-    total_chars = sum(len(s) for s in segments)
-    if total_chars == 0: # Prevent division by zero if text is somehow empty after stripping
-        return None
-    for i, segment in enumerate(segments):
-        # Estimate duration for the segment based on its character count
-        # This assumes a roughly constant speech rate throughout the audio.
-        estimated_segment_duration_ms = (len(segment) / total_chars) * audio_duration_ms
-        start_time = current_time_ms
-        end_time = current_time_ms + estimated_segment_duration_ms
-        # Ensure the last segment's end time matches the total audio duration
-        if i == len(segments) - 1:
-            end_time = audio_duration_ms
-        # Add SRT entry
-        srt_content.append(str(i + 1))
-        srt_content.append(f"{format_time(start_time)} --> {format_time(end_time)}")
-        srt_content.append(segment)
-        srt_content.append("") # Empty line separates SRT blocks
-        current_time_ms = end_time
-    # Save the SRT content to a temporary file
-    srt_filename = f"{os.path.splitext(audio_filepath)[0]}.srt"
-    with open(srt_filename, "w", encoding="utf-8") as f:
-        f.write("\n".join(srt_content))
-    return srt_filename
-# Gradio interface function (wraps async functions and handles SRT generation)
-def tts_interface(text, voice, rate, pitch):
-    """
-    The main interface function for Gradio. It calls text_to_speech and then generate_srt.
-    """
-    # Run the async text_to_speech function
-    audio_path, original_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
-    srt_path = None
-    if audio_path: # Only attempt SRT generation if audio was successfully created
-        srt_path = generate_srt(original_text, audio_path)
-    # Return the generated audio, SRT file, and any warnings
-    return audio_path, srt_path, warning
-# Create Gradio application
-async def create_demo():
-    """
-    Asynchronously creates and configures the Gradio interface.
-    """
-    voices = await get_voices() # Fetch voices when the app starts
-    description = """
-    Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
-    ✨ **New Feature: Generate SRT Subtitles (Estimated Timings)!** ✨
-    Automatically generates an SRT (SubRip Subtitle) file from your input text.
-    **Important Note on Timings:** The SRT timings are *estimated* based on the length of each text segment relative to the total audio duration. This feature *does not* perform advanced audio waveform analysis for precise pause detection or word-level synchronization. For perfectly synchronized subtitles, dedicated forced-alignment tools are typically required.
-    🎥 **Exciting News: Introducing our Text-to-Video Converter!** 🎥
-    Take your content creation to the next level with our cutting-edge Text-to-Video Converter!
-    Transform your words into stunning, professional-quality videos in just a few clicks.
-    ✨ Features:
-    • Convert text to engaging videos with customizable visuals
-    • Choose from 40+ languages and 300+ voices
-    • Perfect for creating audiobooks, storytelling, and language learning materials
-    • Ideal for educators, content creators, and language enthusiasts
-    Ready to revolutionize your content? [Click here to try our Text-to-Video Converter now!](https://text2video.wingetgui.com/)
-    """
-    demo = gr.Interface(
-        fn=tts_interface, # The function that processes inputs and returns outputs
-        inputs=[
-            gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here to convert to speech and generate SRT..."),
-            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="", type="value"),
-            gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
-            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
-        ],
-        outputs=[
-            gr.Audio(label="Generated Audio", type="filepath"),
-            gr.File(label="Generated SRT Subtitle", type="filepath", file_count="single", visible=True), # Output for the SRT file
-            gr.Markdown(label="Warning") # Now expects a string output
-        ],
-        title="Edge TTS Text-to-Speech with SRT Generator",
-        description=description,
-        article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
-        analytics_enabled=False,
-        flagging_mode='never' # Changed from allow_flagging=False
-    )
-    return demo
-# Run the application
-if __name__ == "__main__":
-    demo = asyncio.run(create_demo())
-    demo.launch()

 import gradio as gr
+from pydub import AudioSegment, silence
+import nltk
+import srt
+import io
+import datetime
+nltk.download('punkt')
+def process_audio_and_script(audio_file, script_text):
+    # Load audio
+    audio = AudioSegment.from_file(audio_file)
+    silence_thresh = audio.dBFS - 16
+    silences = silence.detect_silence(audio, min_silence_len=400, silence_thresh=silence_thresh)
+    # Convert silence list to start-end in seconds
+    silences = [(start / 1000, stop / 1000) for start, stop in silences]
+    # Segment script based on punctuation
+    sentences = nltk.tokenize.sent_tokenize(script_text)
+    # Distribute timing across sentences based on silence gaps
+    subtitles = []
+    last_time = 0.0
+    for i, sentence in enumerate(sentences):
+        if i < len(silences):
+            start = last_time
+            end = silences[i][0]
+            last_time = silences[i][1]
+        else:
+            start = last_time
+            end = start + 2.5  # default length if not enough silences
+        subtitle = srt.Subtitle(index=i + 1,
+                                start=datetime.timedelta(seconds=start),
+                                end=datetime.timedelta(seconds=end),
+                                content=sentence)
+        subtitles.append(subtitle)
+    srt_file = srt.compose(subtitles)
+    return srt_file
+def download_srt(audio_file, script_text):
+    srt_data = process_audio_and_script(audio_file, script_text)
+    return ("subtitles.srt", srt_data)
+# Interface
+with gr.Blocks() as demo:
+    gr.Markdown("### 🎙️ Audio to Timed Subtitle (SRT) Generator with Waveform")
+    with gr.Row():
+        audio_input = gr.Audio(type="file", label="Upload Audio")
+        script_input = gr.Textbox(lines=10, label="Paste Script/Text with Punctuation")
+    srt_output = gr.File(label="Download SRT")
+    def waveform_html(audio_file):
+        return f"""
+        <div id="waveform"></div>
+        <script src="https://unpkg.com/wavesurfer.js"></script>
+        <script>
+        var wavesurfer = WaveSurfer.create({
+            container: '#waveform',
+            waveColor: '#999',
+            progressColor: '#333',
+            height: 100
+        });
+        wavesurfer.load("{audio_file}");
+        </script>
+        """
+    waveform = gr.HTML()
+    with gr.Row():
+        gen_btn = gr.Button("Generate SRT")
+        gen_btn.click(fn=download_srt,
+                      inputs=[audio_input, script_input],
+                      outputs=srt_output)
+        audio_input.change(fn=lambda audio: waveform_html(audio["name"]),
+                           inputs=audio_input,
+                           outputs=waveform)
+demo.launch()