hivecorp commited on
Commit
8428946
·
verified ·
1 Parent(s): a52a6e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -90
app.py CHANGED
@@ -1,108 +1,96 @@
1
- import edge_tts
2
- import srt
3
  import os
4
- import wave
5
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
6
 
7
- # Function to calculate audio duration for a given audio file
 
 
 
 
 
 
 
8
  def get_audio_length(audio_path):
9
- with wave.open(audio_path, 'rb') as audio:
10
- frames = audio.getnframes()
11
- rate = audio.getframerate()
12
- return frames / float(rate)
13
 
14
- # Function to generate SRT entries for a batch of text with accurate timing
15
- def generate_accurate_srt(text, start_time, batch_index):
16
  srt_entries = []
17
- current_time = start_time
18
-
19
- for line in text.splitlines():
20
- # Estimate duration of each line based on audio segment generated
21
- duration = len(line.split()) * 0.3 # Assuming approx. 0.3 seconds per word
22
- end_time = current_time + duration
23
-
24
- srt_entries.append(
25
- srt.Subtitle(
26
- index=batch_index,
27
- start=srt.timedelta(seconds=current_time),
28
- end=srt.timedelta(seconds=end_time),
29
- content=line
30
- )
31
- )
32
- current_time = end_time
33
- batch_index += 1
34
- return srt_entries, current_time
35
-
36
- # Process each batch of text, generate audio, and accumulate SRT entries
37
- def batch_process_srt_and_audio(script_text, batch_size=500):
38
- total_srt_entries = []
39
- cumulative_time = 0.0
40
- batch_index = 1
41
 
42
- for i in range(0, len(script_text), batch_size):
43
- batch_text = script_text[i:i+batch_size]
 
 
44
 
45
- # Generate audio for the batch
46
- audio_file = f"audio_batch_{i}.wav"
47
- communicate = edge_tts.Communicate(text=batch_text, voice="en-US-AndrewNeural", rate="-25%")
48
- communicate.save(audio_file)
49
-
50
- # Get the duration of the generated audio batch
51
- batch_duration = get_audio_length(audio_file)
52
-
53
- # Generate SRT entries for this batch and update cumulative time
54
- srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index)
55
-
56
- total_srt_entries.extend(srt_entries)
57
- batch_index += len(srt_entries)
58
 
59
- # Write the SRT file
60
- srt_file = "output.srt"
61
- with open(srt_file, 'w') as file:
62
- file.write(srt.compose(total_srt_entries))
 
 
 
 
 
 
63
 
64
- return srt_file
65
 
66
- # Final validation to ensure no SRT entry extends beyond total audio duration
67
- def validate_srt_against_audio(srt_file_path, audio_file_path):
68
- audio_duration = get_audio_length(audio_file_path)
 
69
 
70
- with open(srt_file_path, 'r') as file:
71
- subtitles = list(srt.parse(file.read()))
72
-
73
- for subtitle in subtitles:
74
- if subtitle.end.total_seconds() > audio_duration:
75
- subtitle.end = srt.timedelta(seconds=audio_duration)
76
- break
 
 
 
77
 
78
- # Write the validated SRT back to the file
79
- with open(srt_file_path, 'w') as file:
80
- file.write(srt.compose(subtitles))
81
-
82
- return srt_file_path
83
 
84
  # Gradio Interface
85
- def process_text_to_srt(script_text):
86
- # Process the script in batches and create SRT
87
- srt_file = batch_process_srt_and_audio(script_text)
 
 
 
 
 
 
 
88
 
89
- # Validate the final SRT file with the complete audio file
90
- final_audio_file = "combined_audio.wav" # Assumes you have a combined final audio file
91
- validate_srt_against_audio(srt_file, final_audio_file)
92
 
93
- return srt_file, final_audio_file
94
-
95
- # Gradio app setup
96
- def main():
97
- gr.Interface(
98
- fn=process_text_to_srt,
99
- inputs="textbox",
100
- outputs=["file", "audio"],
101
- live=True,
102
- title="Text-to-SRT with Accurate Timing",
103
- description="Enter text to convert it into audio with synchronized SRT subtitles. The SRT timings are validated against the total audio duration."
104
- ).launch()
105
 
106
- # Run the app
107
- if __name__ == "__main__":
108
- main()
 
 
 
1
  import os
 
2
  import gradio as gr
3
+ import srt
4
+ import edge_tts
5
+ import asyncio
6
+ import tempfile
7
+ from datetime import timedelta
8
+ from pydub import AudioSegment
9
+
10
+ # Define the Edge TTS settings
11
+ DEFAULT_VOICE = "en-US-AndrewNeural"
12
+ DEFAULT_RATE = "-25%"
13
 
14
+ # Function to generate TTS audio
15
+ async def generate_audio(text, voice=DEFAULT_VOICE, rate=DEFAULT_RATE):
16
+ communicate = edge_tts.Communicate(text, voice, rate)
17
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
18
+ await communicate.save(temp_audio.name)
19
+ return temp_audio.name
20
+
21
+ # Function to get audio length in seconds
22
  def get_audio_length(audio_path):
23
+ audio = AudioSegment.from_file(audio_path)
24
+ return audio.duration_seconds
 
 
25
 
26
+ # Function to generate and adjust SRT timings
27
+ def generate_accurate_srt(text, audio_path):
28
  srt_entries = []
29
+ total_duration = get_audio_length(audio_path)
30
+ words = text.split()
31
+ words_per_segment = 10 # 8-10 words per segment
32
+ segment_duration = total_duration / (len(words) // words_per_segment)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ for i in range(0, len(words), words_per_segment):
35
+ segment_words = words[i:i+words_per_segment]
36
+ start_time = timedelta(seconds=i // words_per_segment * segment_duration)
37
+ end_time = timedelta(seconds=(i // words_per_segment + 1) * segment_duration)
38
 
39
+ # Ensure each segment has proper punctuation
40
+ srt_entry = srt.Subtitle(index=i // words_per_segment + 1,
41
+ start=start_time,
42
+ end=end_time,
43
+ content=" ".join(segment_words))
44
+ srt_entries.append(srt_entry)
 
 
 
 
 
 
 
45
 
46
+ # Cross-check timings to fit actual audio length
47
+ final_srt = []
48
+ current_time = 0
49
+ for entry in srt_entries:
50
+ entry_duration = (entry.end - entry.start).total_seconds()
51
+ adjusted_end = min(current_time + entry_duration, total_duration)
52
+ entry.start = timedelta(seconds=current_time)
53
+ entry.end = timedelta(seconds=adjusted_end)
54
+ final_srt.append(entry)
55
+ current_time += entry_duration
56
 
57
+ return list(srt.parse(srt.compose(final_srt)))
58
 
59
+ # Function to create SRT file with batch processing
60
+ def batch_process_srt_and_audio(text_list):
61
+ srt_results = []
62
+ audio_files = []
63
 
64
+ for text in text_list:
65
+ audio_path = asyncio.run(generate_audio(text))
66
+ srt_content = generate_accurate_srt(text, audio_path)
67
+
68
+ srt_path = tempfile.mktemp(suffix=".srt")
69
+ with open(srt_path, "w") as srt_file:
70
+ srt_file.write(srt.compose(srt_content))
71
+
72
+ srt_results.append(srt_path)
73
+ audio_files.append(audio_path)
74
 
75
+ return srt_results, audio_files
 
 
 
 
76
 
77
  # Gradio Interface
78
+ def process_batch(texts):
79
+ srt_files, audio_files = batch_process_srt_and_audio(texts)
80
+ audio_previews = [gr.Audio.update(label=f"Audio {i+1}", value=file) for i, file in enumerate(audio_files)]
81
+ srt_previews = [gr.File.update(label=f"SRT {i+1}", value=srt_file) for i, srt_file in enumerate(srt_files)]
82
+ return audio_previews, srt_previews
83
+
84
+ # Gradio App Interface
85
+ with gr.Blocks() as app:
86
+ gr.Markdown("### Batch Audio and SRT Generator")
87
+ text_inputs = gr.Textbox(placeholder="Enter multiple texts separated by a new line", lines=10, label="Text Input")
88
 
89
+ with gr.Row():
90
+ audio_preview = gr.Audio(label="Generated Audio", type="filepath")
91
+ srt_preview = gr.File(label="Generated SRT")
92
 
93
+ process_button = gr.Button("Process Batch")
94
+ process_button.click(fn=process_batch, inputs=text_inputs, outputs=[audio_preview, srt_preview])
 
 
 
 
 
 
 
 
 
 
95
 
96
+ app.launch()