Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -202,8 +202,7 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch):
|
|
| 202 |
timed_audio_segments = []
|
| 203 |
max_end_time_ms = 0
|
| 204 |
previous_end_time_ms = 0
|
| 205 |
-
|
| 206 |
-
next_start_time_ms = None # Keep track of the *start* time of the next segment
|
| 207 |
|
| 208 |
for i, line in enumerate(lines):
|
| 209 |
start_time, audio_paths = await process_transcript_line(line, voice, rate, pitch)
|
|
@@ -221,15 +220,56 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch):
|
|
| 221 |
current_audio_duration = len(combined_line_audio)
|
| 222 |
intended_start_time = start_time
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
if
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
timed_audio_segments.append({'start': intended_start_time, 'audio': combined_line_audio})
|
| 232 |
-
previous_start_time_ms = start_time #update previous start time
|
| 233 |
previous_end_time_ms = max(previous_end_time_ms, intended_start_time + current_audio_duration)
|
| 234 |
max_end_time_ms = max(max_end_time_ms, previous_end_time_ms)
|
| 235 |
elif audio_paths:
|
|
@@ -305,4 +345,3 @@ async def create_demo():
|
|
| 305 |
if __name__ == "__main__":
|
| 306 |
demo = asyncio.run(create_demo())
|
| 307 |
demo.launch()
|
| 308 |
-
|
|
|
|
| 202 |
timed_audio_segments = []
|
| 203 |
max_end_time_ms = 0
|
| 204 |
previous_end_time_ms = 0
|
| 205 |
+
next_start_time_ms = None
|
|
|
|
| 206 |
|
| 207 |
for i, line in enumerate(lines):
|
| 208 |
start_time, audio_paths = await process_transcript_line(line, voice, rate, pitch)
|
|
|
|
| 220 |
current_audio_duration = len(combined_line_audio)
|
| 221 |
intended_start_time = start_time
|
| 222 |
|
| 223 |
+
# Get next start time for comparison
|
| 224 |
+
if i + 1 < len(lines):
|
| 225 |
+
next_line_match = re.match(r'(\d{2}):(\d{2}):(\d{2}),(\d{3})\s+.*', lines[i + 1])
|
| 226 |
+
if next_line_match:
|
| 227 |
+
next_h, next_m, next_s, next_ms = next_line_match.groups()
|
| 228 |
+
next_start_time_ms = (
|
| 229 |
+
int(next_h) * 3600000 +
|
| 230 |
+
int(next_m) * 60000 +
|
| 231 |
+
int(next_s) * 1000 +
|
| 232 |
+
int(next_ms)
|
| 233 |
+
)
|
| 234 |
+
else:
|
| 235 |
+
next_start_time_ms = None
|
| 236 |
+
else:
|
| 237 |
+
next_start_time_ms = None
|
| 238 |
|
| 239 |
+
# Combine audio segments if current audio is longer than the time difference
|
| 240 |
+
while next_start_time_ms and current_audio_duration > (next_start_time_ms - start_time):
|
| 241 |
+
if i + 1 < len(lines):
|
| 242 |
+
next_start_time, next_audio_paths = await process_transcript_line(lines[i + 1], voice, rate, pitch)
|
| 243 |
+
if next_start_time is not None and next_audio_paths:
|
| 244 |
+
for next_path in next_audio_paths:
|
| 245 |
+
try:
|
| 246 |
+
next_audio = AudioSegment.from_mp3(next_path)
|
| 247 |
+
combined_line_audio += next_audio
|
| 248 |
+
os.remove(next_path)
|
| 249 |
+
except FileNotFoundError:
|
| 250 |
+
print(f"Warning: Audio file not found: {next_path}")
|
| 251 |
+
current_audio_duration = len(combined_line_audio)
|
| 252 |
+
i += 1 # Move to the next line
|
| 253 |
+
if i + 1 < len(lines):
|
| 254 |
+
next_line_match = re.match(r'(\d{2}):(\d{2}):(\d{2}),(\d{3})\s+.*', lines[i + 1])
|
| 255 |
+
if next_line_match:
|
| 256 |
+
next_h, next_m, next_s, next_ms = next_line_match.groups()
|
| 257 |
+
next_start_time_ms = (
|
| 258 |
+
int(next_h) * 3600000 +
|
| 259 |
+
int(next_m) * 60000 +
|
| 260 |
+
int(next_s) * 1000 +
|
| 261 |
+
int(next_ms)
|
| 262 |
+
)
|
| 263 |
+
else:
|
| 264 |
+
next_start_time_ms = None
|
| 265 |
+
else:
|
| 266 |
+
next_start_time_ms = None
|
| 267 |
+
else:
|
| 268 |
+
break # Exit the loop if there are no more processable lines
|
| 269 |
+
else:
|
| 270 |
+
break
|
| 271 |
|
| 272 |
timed_audio_segments.append({'start': intended_start_time, 'audio': combined_line_audio})
|
|
|
|
| 273 |
previous_end_time_ms = max(previous_end_time_ms, intended_start_time + current_audio_duration)
|
| 274 |
max_end_time_ms = max(max_end_time_ms, previous_end_time_ms)
|
| 275 |
elif audio_paths:
|
|
|
|
| 345 |
if __name__ == "__main__":
|
| 346 |
demo = asyncio.run(create_demo())
|
| 347 |
demo.launch()
|
|
|