insta-maker-3-api

Runtime error

App Files Files Community

hivecorp commited on Nov 3, 2024

Commit

8428946

verified ·

1 Parent(s): a52a6e3

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -90

app.py CHANGED Viewed

@@ -1,108 +1,96 @@
-import edge_tts
-import srt
 import os
-import wave
 import gradio as gr
-# Function to calculate audio duration for a given audio file
 def get_audio_length(audio_path):
-    with wave.open(audio_path, 'rb') as audio:
-        frames = audio.getnframes()
-        rate = audio.getframerate()
-        return frames / float(rate)
-# Function to generate SRT entries for a batch of text with accurate timing
-def generate_accurate_srt(text, start_time, batch_index):
     srt_entries = []
-    current_time = start_time
-    for line in text.splitlines():
-        # Estimate duration of each line based on audio segment generated
-        duration = len(line.split()) * 0.3  # Assuming approx. 0.3 seconds per word
-        end_time = current_time + duration
-        srt_entries.append(
-            srt.Subtitle(
-                index=batch_index,
-                start=srt.timedelta(seconds=current_time),
-                end=srt.timedelta(seconds=end_time),
-                content=line
-            )
-        )
-        current_time = end_time
-        batch_index += 1
-    return srt_entries, current_time
-# Process each batch of text, generate audio, and accumulate SRT entries
-def batch_process_srt_and_audio(script_text, batch_size=500):
-    total_srt_entries = []
-    cumulative_time = 0.0
-    batch_index = 1
-    for i in range(0, len(script_text), batch_size):
-        batch_text = script_text[i:i+batch_size]
-        # Generate audio for the batch
-        audio_file = f"audio_batch_{i}.wav"
-        communicate = edge_tts.Communicate(text=batch_text, voice="en-US-AndrewNeural", rate="-25%")
-        communicate.save(audio_file)
-        # Get the duration of the generated audio batch
-        batch_duration = get_audio_length(audio_file)
-        # Generate SRT entries for this batch and update cumulative time
-        srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index)
-        total_srt_entries.extend(srt_entries)
-        batch_index += len(srt_entries)
-    # Write the SRT file
-    srt_file = "output.srt"
-    with open(srt_file, 'w') as file:
-        file.write(srt.compose(total_srt_entries))
-    return srt_file
-# Final validation to ensure no SRT entry extends beyond total audio duration
-def validate_srt_against_audio(srt_file_path, audio_file_path):
-    audio_duration = get_audio_length(audio_file_path)
-    with open(srt_file_path, 'r') as file:
-        subtitles = list(srt.parse(file.read()))
-    for subtitle in subtitles:
-        if subtitle.end.total_seconds() > audio_duration:
-            subtitle.end = srt.timedelta(seconds=audio_duration)
-            break
-    # Write the validated SRT back to the file
-    with open(srt_file_path, 'w') as file:
-        file.write(srt.compose(subtitles))
-    return srt_file_path
 # Gradio Interface
-def process_text_to_srt(script_text):
-    # Process the script in batches and create SRT
-    srt_file = batch_process_srt_and_audio(script_text)
-    # Validate the final SRT file with the complete audio file
-    final_audio_file = "combined_audio.wav"  # Assumes you have a combined final audio file
-    validate_srt_against_audio(srt_file, final_audio_file)
-    return srt_file, final_audio_file
-# Gradio app setup
-def main():
-    gr.Interface(
-        fn=process_text_to_srt,
-        inputs="textbox",
-        outputs=["file", "audio"],
-        live=True,
-        title="Text-to-SRT with Accurate Timing",
-        description="Enter text to convert it into audio with synchronized SRT subtitles. The SRT timings are validated against the total audio duration."
-    ).launch()
-# Run the app
-if __name__ == "__main__":
-    main()

 import os
 import gradio as gr
+import srt
+import edge_tts
+import asyncio
+import tempfile
+from datetime import timedelta
+from pydub import AudioSegment
+# Define the Edge TTS settings
+DEFAULT_VOICE = "en-US-AndrewNeural"
+DEFAULT_RATE = "-25%"
+# Function to generate TTS audio
+async def generate_audio(text, voice=DEFAULT_VOICE, rate=DEFAULT_RATE):
+    communicate = edge_tts.Communicate(text, voice, rate)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
+        await communicate.save(temp_audio.name)
+        return temp_audio.name
+# Function to get audio length in seconds
 def get_audio_length(audio_path):
+    audio = AudioSegment.from_file(audio_path)
+    return audio.duration_seconds
+# Function to generate and adjust SRT timings
+def generate_accurate_srt(text, audio_path):
     srt_entries = []
+    total_duration = get_audio_length(audio_path)
+    words = text.split()
+    words_per_segment = 10  # 8-10 words per segment
+    segment_duration = total_duration / (len(words) // words_per_segment)
+    for i in range(0, len(words), words_per_segment):
+        segment_words = words[i:i+words_per_segment]
+        start_time = timedelta(seconds=i // words_per_segment * segment_duration)
+        end_time = timedelta(seconds=(i // words_per_segment + 1) * segment_duration)
+        # Ensure each segment has proper punctuation
+        srt_entry = srt.Subtitle(index=i // words_per_segment + 1,
+                                 start=start_time,
+                                 end=end_time,
+                                 content=" ".join(segment_words))
+        srt_entries.append(srt_entry)
+    # Cross-check timings to fit actual audio length
+    final_srt = []
+    current_time = 0
+    for entry in srt_entries:
+        entry_duration = (entry.end - entry.start).total_seconds()
+        adjusted_end = min(current_time + entry_duration, total_duration)
+        entry.start = timedelta(seconds=current_time)
+        entry.end = timedelta(seconds=adjusted_end)
+        final_srt.append(entry)
+        current_time += entry_duration
+    return list(srt.parse(srt.compose(final_srt)))
+# Function to create SRT file with batch processing
+def batch_process_srt_and_audio(text_list):
+    srt_results = []
+    audio_files = []
+    for text in text_list:
+        audio_path = asyncio.run(generate_audio(text))
+        srt_content = generate_accurate_srt(text, audio_path)
+        srt_path = tempfile.mktemp(suffix=".srt")
+        with open(srt_path, "w") as srt_file:
+            srt_file.write(srt.compose(srt_content))
+        srt_results.append(srt_path)
+        audio_files.append(audio_path)
+    return srt_results, audio_files
 # Gradio Interface
+def process_batch(texts):
+    srt_files, audio_files = batch_process_srt_and_audio(texts)
+    audio_previews = [gr.Audio.update(label=f"Audio {i+1}", value=file) for i, file in enumerate(audio_files)]
+    srt_previews = [gr.File.update(label=f"SRT {i+1}", value=srt_file) for i, srt_file in enumerate(srt_files)]
+    return audio_previews, srt_previews
+# Gradio App Interface
+with gr.Blocks() as app:
+    gr.Markdown("### Batch Audio and SRT Generator")
+    text_inputs = gr.Textbox(placeholder="Enter multiple texts separated by a new line", lines=10, label="Text Input")
+    with gr.Row():
+        audio_preview = gr.Audio(label="Generated Audio", type="filepath")
+        srt_preview = gr.File(label="Generated SRT")
+    process_button = gr.Button("Process Batch")
+    process_button.click(fn=process_batch, inputs=text_inputs, outputs=[audio_preview, srt_preview])
+app.launch()