hivecorp commited on
Commit
4342ca8
·
verified ·
1 Parent(s): bc3f691

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -30
app.py CHANGED
@@ -23,21 +23,17 @@ def format_time(seconds):
23
  # Function to split text into segments by punctuation or limit to 7-8 words
24
  def split_text_into_segments(text):
25
  segments = []
26
- # Split by punctuation (., !, ?)
27
  raw_segments = re.split(r'([.!?])', text)
28
  for i in range(0, len(raw_segments) - 1, 2):
29
- # Combine segment with following punctuation
30
  sentence = raw_segments[i].strip() + raw_segments[i + 1]
31
  words = sentence.split()
32
 
33
- # If segment is longer than 8 words, split into 7-8 word chunks
34
  if len(words) > 8:
35
  for j in range(0, len(words), 8):
36
  segments.append(" ".join(words[j:j + 8]))
37
  else:
38
  segments.append(sentence.strip())
39
 
40
- # Handle remaining text after the last punctuation
41
  if len(raw_segments) % 2 == 1:
42
  remaining_text = raw_segments[-1].strip()
43
  words = remaining_text.split()
@@ -46,11 +42,11 @@ def split_text_into_segments(text):
46
 
47
  return segments
48
 
49
- # Function to generate SRT with accurate timing per batch and cross-check timing
50
  async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice):
51
  audio_file = f"batch_{batch_num}_audio.wav"
52
 
53
- # Generate the audio using edge-tts with pitch and rate adjustment
54
  tts = edge_tts.Communicate(batch_text, voice, rate=rate, pitch=f"{pitch}Hz")
55
  await tts.save(audio_file)
56
 
@@ -67,7 +63,6 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate
67
  for index, segment in enumerate(segments):
68
  end_time = start_time + segment_duration
69
 
70
- # If end_time exceeds actual audio length of the batch, adjust it
71
  if end_time > start_offset + actual_length:
72
  end_time = start_offset + actual_length
73
 
@@ -75,35 +70,28 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate
75
  srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
76
  srt_content += segment + "\n\n"
77
 
78
- # Update start time for next segment
79
  start_time = end_time
80
 
81
- return srt_content, audio_file, start_time # Return updated start time for cumulative tracking
82
 
83
- # Batch processing function with cumulative timing, progress indicator, and final SRT validation
84
  async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=gr.Progress()):
85
  batches = [script_text[i:i + 500] for i in range(0, len(script_text), 500)]
86
  all_srt_content = ""
87
  combined_audio = AudioSegment.empty()
88
- start_offset = 0.0 # Track cumulative time offset for SRT timing
89
 
90
- # Process each batch sequentially to ensure proper timing and cumulative offset tracking
91
  for batch_num, batch_text in enumerate(batches):
92
  srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice)
93
  all_srt_content += srt_content
94
 
95
- # Append the audio of each batch to the combined audio
96
  batch_audio = AudioSegment.from_file(audio_file)
97
  combined_audio += batch_audio
98
- start_offset = end_offset # Update the start offset for the next batch
99
 
100
- # Clean up the individual batch audio file
101
  os.remove(audio_file)
102
-
103
- # Update progress
104
  progress((batch_num + 1) / len(batches))
105
 
106
- # Final cross-check: Adjust any subtitle that exceeds the total audio length
107
  total_audio_length = combined_audio.duration_seconds
108
  validated_srt_content = ""
109
  for line in all_srt_content.strip().splitlines():
@@ -116,15 +104,12 @@ async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=
116
  line = f"{format_time(start_time)} --> {format_time(end_time)}"
117
  validated_srt_content += line + "\n"
118
 
119
- # Generate unique names for the final files
120
  unique_id = uuid.uuid4()
121
- final_audio_path = f"final_audio_{unique_id}.mp3" # Set to MP3
122
  final_srt_path = f"final_subtitles_{unique_id}.srt"
123
 
124
- # Export combined audio directly as MP3 with 320 kbps bitrate
125
  combined_audio.export(final_audio_path, format="mp3", bitrate="320k")
126
 
127
- # Export validated SRT with unique names
128
  with open(final_srt_path, "w") as srt_file:
129
  srt_file.write(validated_srt_content)
130
 
@@ -132,12 +117,11 @@ async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=
132
 
133
  # Gradio interface function
134
  async def process_script(script_text, pitch, rate, voice):
135
- # Ensure rate is formatted correctly
136
- formatted_rate = f"{'+' if rate > 0 else ''}{int(rate)}%" # Format rate for edge_tts
137
- srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch, formatted_rate, voice)
138
  return srt_path, audio_path, audio_path
139
 
140
- # Gradio interface setup with pitch adjustment slider, rate adjustment slider, and voice selection
141
  voice_options = {
142
  "Andrew": "en-US-AndrewNeural",
143
  "Jenny": "en-US-JennyNeural",
@@ -183,16 +167,17 @@ app = gr.Interface(
183
  fn=process_script,
184
  inputs=[
185
  gr.Textbox(label="Enter Script Text", lines=10),
186
- gr.Slider(label="Pitch Adjustment (Hz)", minimum=-100, maximum=100, step=1, value=1), # Default set to +1
187
- gr.Slider(label="Rate Adjustment (%)", minimum=-100, maximum=100, step=1, value=1), # Default set to +1
188
- gr.Dropdown(label="Select Voice", choices=list(voice_options.keys()), value="Andrew")
 
189
  ],
190
  outputs=[
191
  gr.File(label="Download SRT"),
192
  gr.File(label="Download Audio"),
193
  gr.Audio(label="Audio Playback")
194
  ],
195
- live=True
196
  )
197
 
198
  app.launch()
 
23
  # Function to split text into segments by punctuation or limit to 7-8 words
24
  def split_text_into_segments(text):
25
  segments = []
 
26
  raw_segments = re.split(r'([.!?])', text)
27
  for i in range(0, len(raw_segments) - 1, 2):
 
28
  sentence = raw_segments[i].strip() + raw_segments[i + 1]
29
  words = sentence.split()
30
 
 
31
  if len(words) > 8:
32
  for j in range(0, len(words), 8):
33
  segments.append(" ".join(words[j:j + 8]))
34
  else:
35
  segments.append(sentence.strip())
36
 
 
37
  if len(raw_segments) % 2 == 1:
38
  remaining_text = raw_segments[-1].strip()
39
  words = remaining_text.split()
 
42
 
43
  return segments
44
 
45
+ # Function to generate SRT with accurate timing per batch
46
  async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice):
47
  audio_file = f"batch_{batch_num}_audio.wav"
48
 
49
+ # Generate the audio using edge-tts
50
  tts = edge_tts.Communicate(batch_text, voice, rate=rate, pitch=f"{pitch}Hz")
51
  await tts.save(audio_file)
52
 
 
63
  for index, segment in enumerate(segments):
64
  end_time = start_time + segment_duration
65
 
 
66
  if end_time > start_offset + actual_length:
67
  end_time = start_offset + actual_length
68
 
 
70
  srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
71
  srt_content += segment + "\n\n"
72
 
 
73
  start_time = end_time
74
 
75
+ return srt_content, audio_file, start_time
76
 
77
+ # Batch processing function
78
  async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=gr.Progress()):
79
  batches = [script_text[i:i + 500] for i in range(0, len(script_text), 500)]
80
  all_srt_content = ""
81
  combined_audio = AudioSegment.empty()
82
+ start_offset = 0.0
83
 
 
84
  for batch_num, batch_text in enumerate(batches):
85
  srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice)
86
  all_srt_content += srt_content
87
 
 
88
  batch_audio = AudioSegment.from_file(audio_file)
89
  combined_audio += batch_audio
90
+ start_offset = end_offset
91
 
 
92
  os.remove(audio_file)
 
 
93
  progress((batch_num + 1) / len(batches))
94
 
 
95
  total_audio_length = combined_audio.duration_seconds
96
  validated_srt_content = ""
97
  for line in all_srt_content.strip().splitlines():
 
104
  line = f"{format_time(start_time)} --> {format_time(end_time)}"
105
  validated_srt_content += line + "\n"
106
 
 
107
  unique_id = uuid.uuid4()
108
+ final_audio_path = f"final_audio_{unique_id}.mp3"
109
  final_srt_path = f"final_subtitles_{unique_id}.srt"
110
 
 
111
  combined_audio.export(final_audio_path, format="mp3", bitrate="320k")
112
 
 
113
  with open(final_srt_path, "w") as srt_file:
114
  srt_file.write(validated_srt_content)
115
 
 
117
 
118
  # Gradio interface function
119
  async def process_script(script_text, pitch, rate, voice):
120
+ formatted_rate = f"{'+' if rate > 0 else ''}{int(rate)}%"
121
+ srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch, formatted_rate, voice_options[voice])
 
122
  return srt_path, audio_path, audio_path
123
 
124
+ # Gradio interface setup
125
  voice_options = {
126
  "Andrew": "en-US-AndrewNeural",
127
  "Jenny": "en-US-JennyNeural",
 
167
  fn=process_script,
168
  inputs=[
169
  gr.Textbox(label="Enter Script Text", lines=10),
170
+ gr.Slider(label="Pitch Adjustment (Hz)", minimum=-100, maximum=100, step=1, value=1),
171
+ gr.Slider(label="Rate Adjustment (%)", minimum=-100, maximum=100, step=1, value=1),
172
+ gr.Dropdown(label="Select Voice", choices=list(voice_options.keys()), value="Andrew"),
173
+ gr.Button(label="Submit") # Add submit button
174
  ],
175
  outputs=[
176
  gr.File(label="Download SRT"),
177
  gr.File(label="Download Audio"),
178
  gr.Audio(label="Audio Playback")
179
  ],
180
+ live=False # Changed to False to enable the Submit button functionality
181
  )
182
 
183
  app.launch()