insta-maker

Sleeping

App Files Files Community

hivecorp commited on Nov 3, 2024

Commit

8ca57cc

verified ·

1 Parent(s): 0d6b2aa

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -16

app.py CHANGED Viewed

@@ -13,11 +13,6 @@ def get_audio_length(audio_path):
         rate = audio.getframerate()
         return frames / float(rate)
-# Ensure that the file starts with "RIFF" (WAV header)
-def is_wav_file(filepath):
-    with open(filepath, 'rb') as f:
-        return f.read(4) == b'RIFF'
 # Generate precise SRT entries for a text batch
 def generate_accurate_srt(text, start_time, batch_index):
     srt_entries = []
@@ -48,28 +43,28 @@ async def batch_process_srt_and_audio(script_text, voice, batch_size=500, progre
     # Split text into manageable batches
     for i in range(0, len(script_text), batch_size):
         batch_text = script_text[i:i+batch_size]
-        audio_file = f"audio_batch_{i}.wav"
-        # Generate audio for each batch and save as WAV
         tts = edge_tts.Communicate(batch_text, voice, rate="-25%")
-        await tts.save(audio_file)
-        # Check if saved file is a valid WAV file
-        if not is_wav_file(audio_file):
-            raise ValueError(f"Audio file {audio_file} is not a valid WAV file.")
-        # Get precise audio length for synchronization
-        batch_duration = get_audio_length(audio_file)
         srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index)
         # Append entries and audio for the batch
         total_srt_entries.extend(srt_entries)
-        batch_audio = AudioSegment.from_file(audio_file, format="wav")  # Explicitly specify format
         combined_audio += batch_audio
         batch_index += len(srt_entries)
-        # Remove individual batch audio file
-        os.remove(audio_file)
     # Export combined audio and SRT
     combined_audio.export("final_audio.wav", format="wav")

         rate = audio.getframerate()
         return frames / float(rate)
 # Generate precise SRT entries for a text batch
 def generate_accurate_srt(text, start_time, batch_index):
     srt_entries = []
     # Split text into manageable batches
     for i in range(0, len(script_text), batch_size):
         batch_text = script_text[i:i+batch_size]
+        mp3_file = f"audio_batch_{i}.mp3"  # Save as MP3 first
+        wav_file = f"audio_batch_{i}.wav"  # Convert to WAV
+        # Generate audio for each batch and save as MP3
         tts = edge_tts.Communicate(batch_text, voice, rate="-25%")
+        await tts.save(mp3_file)
+        # Convert MP3 to WAV
+        batch_audio = AudioSegment.from_file(mp3_file, format="mp3")
+        batch_audio.export(wav_file, format="wav")
+        # Ensure WAV conversion succeeded and calculate duration
+        batch_duration = get_audio_length(wav_file)
         srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index)
         # Append entries and audio for the batch
         total_srt_entries.extend(srt_entries)
         combined_audio += batch_audio
         batch_index += len(srt_entries)
+        # Clean up temporary MP3 file
+        os.remove(mp3_file)
     # Export combined audio and SRT
     combined_audio.export("final_audio.wav", format="wav")