TextToSpeech

Sleeping

App Files Files Community

hivecorp commited on Oct 20, 2024

Commit

501e57f

verified ·

1 Parent(s): f1779f5

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -6

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import asyncio
 import tempfile
 import os
 from moviepy.editor import AudioFileClip
 # Get all available voices
 async def get_voices():
@@ -26,16 +27,28 @@ async def text_to_speech(text, voice, rate, pitch):
         await communicate.save(tmp_path)
     return tmp_path, None
 # Generate SRT file based on user preferences
 def generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph):
-    total_words = len(words)
-    # Calculate how long each segment will be displayed
-    segment_duration = audio_duration / (total_words // words_per_line // lines_per_paragraph)  # Calculate duration based on total segments
     current_time = 0
     with open(srt_path, 'w', encoding='utf-8') as srt_file:
-        for i in range(0, total_words, words_per_line):
             # Gather lines based on the defined words per line
             lines = words[i:i + words_per_line]
             line_text = ' '.join(lines)
@@ -73,6 +86,9 @@ async def text_to_audio_and_srt(text, voice, rate, pitch, words_per_line, lines_
     audio_clip = AudioFileClip(audio_path)
     audio_duration = audio_clip.duration
     # Generate SRT file based on the entire text
     base_name = os.path.splitext(audio_path)[0]
     srt_path = f"{base_name}_subtitle.srt"

 import tempfile
 import os
 from moviepy.editor import AudioFileClip
+import speech_recognition as sr
 # Get all available voices
 async def get_voices():
         await communicate.save(tmp_path)
     return tmp_path, None
+# Function to analyze audio and get speech timing
+def analyze_audio(audio_path):
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_path) as source:
+        audio_data = recognizer.record(source)
+        # Recognize speech using Google Web Speech API
+        try:
+            text = recognizer.recognize_google(audio_data)
+            return text
+        except sr.UnknownValueError:
+            return ""
+        except sr.RequestError:
+            return ""
 # Generate SRT file based on user preferences
 def generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph):
+    total_segments = (len(words) // words_per_line) // lines_per_paragraph + 1
+    segment_duration = audio_duration / total_segments  # Calculate duration for each segment
     current_time = 0
     with open(srt_path, 'w', encoding='utf-8') as srt_file:
+        for i in range(0, len(words), words_per_line):
             # Gather lines based on the defined words per line
             lines = words[i:i + words_per_line]
             line_text = ' '.join(lines)
     audio_clip = AudioFileClip(audio_path)
     audio_duration = audio_clip.duration
+    # Analyze audio to get the actual spoken text
+    spoken_text = analyze_audio(audio_path)
     # Generate SRT file based on the entire text
     base_name = os.path.splitext(audio_path)[0]
     srt_path = f"{base_name}_subtitle.srt"