Update app.py
Browse files
app.py
CHANGED
|
@@ -161,6 +161,40 @@ def supports_voice_cloning():
|
|
| 161 |
"""Check if the current model supports voice cloning"""
|
| 162 |
return "xtts" in current_model.lower()
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
def load_tts_model():
|
| 165 |
"""Load TTS model with retry logic and proper error handling"""
|
| 166 |
global tts, model_loaded, current_model, voice_cloning_supported, model_loading, model_load_attempts
|
|
@@ -252,6 +286,9 @@ async def generate_tts(request: TTSRequest):
|
|
| 252 |
filename = f"voiceover_{timestamp}.wav"
|
| 253 |
output_path = f"/tmp/output/{filename}"
|
| 254 |
|
|
|
|
|
|
|
|
|
|
| 255 |
# Get voice path if custom voice is requested
|
| 256 |
speaker_wav = None
|
| 257 |
if request.voice_name != "default":
|
|
@@ -262,21 +299,53 @@ async def generate_tts(request: TTSRequest):
|
|
| 262 |
"message": f"Voice '{request.voice_name}' not found"
|
| 263 |
}
|
| 264 |
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
|
| 281 |
# Upload to OCI
|
| 282 |
upload_result, error = upload_to_oci_with_retry(
|
|
@@ -285,11 +354,13 @@ async def generate_tts(request: TTSRequest):
|
|
| 285 |
|
| 286 |
if error:
|
| 287 |
print(f"β OCI upload failed: {error}")
|
|
|
|
| 288 |
return {
|
| 289 |
"status": "partial_success",
|
| 290 |
"message": f"TTS generated but upload failed: {error}",
|
| 291 |
"local_file": output_path,
|
| 292 |
-
"filename": filename
|
|
|
|
| 293 |
}
|
| 294 |
|
| 295 |
print(f"β
Upload successful: {filename}")
|
|
@@ -297,8 +368,9 @@ async def generate_tts(request: TTSRequest):
|
|
| 297 |
# Clean up local file
|
| 298 |
try:
|
| 299 |
os.remove(output_path)
|
| 300 |
-
|
| 301 |
-
|
|
|
|
| 302 |
|
| 303 |
return {
|
| 304 |
"status": "success",
|
|
@@ -311,7 +383,14 @@ async def generate_tts(request: TTSRequest):
|
|
| 311 |
|
| 312 |
except Exception as e:
|
| 313 |
print(f"β TTS generation error: {str(e)}")
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
@app.post("/api/batch-tts")
|
| 317 |
async def batch_generate_tts(request: BatchTTSRequest):
|
|
@@ -350,19 +429,63 @@ async def batch_generate_tts(request: BatchTTSRequest):
|
|
| 350 |
filename = f"voiceover_{i+1:02d}.wav"
|
| 351 |
output_path = f"/tmp/output/{filename}"
|
| 352 |
|
| 353 |
-
#
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
|
| 367 |
# Upload to OCI
|
| 368 |
upload_result, error = upload_to_oci_with_retry(
|
|
@@ -376,7 +499,8 @@ async def batch_generate_tts(request: BatchTTSRequest):
|
|
| 376 |
"status": "partial_success",
|
| 377 |
"message": f"TTS generated but upload failed: {error}",
|
| 378 |
"local_file": output_path,
|
| 379 |
-
"filename": filename
|
|
|
|
| 380 |
})
|
| 381 |
else:
|
| 382 |
print(f"β
Upload successful: {filename}")
|
|
@@ -385,14 +509,16 @@ async def batch_generate_tts(request: BatchTTSRequest):
|
|
| 385 |
"status": "success",
|
| 386 |
"message": "TTS generated and uploaded successfully",
|
| 387 |
"filename": filename,
|
| 388 |
-
"oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}")
|
|
|
|
| 389 |
})
|
| 390 |
|
| 391 |
# Clean up local file
|
| 392 |
try:
|
| 393 |
os.remove(output_path)
|
| 394 |
-
|
| 395 |
-
|
|
|
|
| 396 |
|
| 397 |
return {
|
| 398 |
"status": "completed",
|
|
|
|
| 161 |
"""Check if the current model supports voice cloning"""
|
| 162 |
return "xtts" in current_model.lower()
|
| 163 |
|
| 164 |
+
def save_wav(audio, file_path):
|
| 165 |
+
"""Save audio to WAV file manually"""
|
| 166 |
+
try:
|
| 167 |
+
# Try soundfile first
|
| 168 |
+
try:
|
| 169 |
+
import soundfile as sf
|
| 170 |
+
sf.write(file_path, audio, 22050) # Standard TTS sample rate
|
| 171 |
+
return True
|
| 172 |
+
except ImportError:
|
| 173 |
+
print("β οΈ soundfile not available, using fallback method")
|
| 174 |
+
|
| 175 |
+
# Fallback: use wave library
|
| 176 |
+
import wave
|
| 177 |
+
import numpy as np
|
| 178 |
+
|
| 179 |
+
# Ensure audio is numpy array
|
| 180 |
+
if isinstance(audio, list):
|
| 181 |
+
audio = np.array(audio)
|
| 182 |
+
|
| 183 |
+
# Convert to 16-bit PCM
|
| 184 |
+
audio_int16 = (audio * 32767).astype(np.int16)
|
| 185 |
+
|
| 186 |
+
with wave.open(file_path, 'wb') as wav_file:
|
| 187 |
+
wav_file.setnchannels(1) # Mono
|
| 188 |
+
wav_file.setsampwidth(2) # 16-bit
|
| 189 |
+
wav_file.setframerate(22050) # Sample rate
|
| 190 |
+
wav_file.writeframes(audio_int16.tobytes())
|
| 191 |
+
|
| 192 |
+
return True
|
| 193 |
+
|
| 194 |
+
except Exception as e:
|
| 195 |
+
print(f"β Failed to save WAV: {e}")
|
| 196 |
+
return False
|
| 197 |
+
|
| 198 |
def load_tts_model():
|
| 199 |
"""Load TTS model with retry logic and proper error handling"""
|
| 200 |
global tts, model_loaded, current_model, voice_cloning_supported, model_loading, model_load_attempts
|
|
|
|
| 286 |
filename = f"voiceover_{timestamp}.wav"
|
| 287 |
output_path = f"/tmp/output/{filename}"
|
| 288 |
|
| 289 |
+
# Ensure output directory exists
|
| 290 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 291 |
+
|
| 292 |
# Get voice path if custom voice is requested
|
| 293 |
speaker_wav = None
|
| 294 |
if request.voice_name != "default":
|
|
|
|
| 299 |
"message": f"Voice '{request.voice_name}' not found"
|
| 300 |
}
|
| 301 |
|
| 302 |
+
print(f"π Generating TTS to: {output_path}")
|
| 303 |
+
|
| 304 |
+
# Generate TTS based on model capabilities - WITH ERROR HANDLING
|
| 305 |
+
try:
|
| 306 |
+
if supports_voice_cloning():
|
| 307 |
+
# XTTS model with voice cloning support
|
| 308 |
+
tts.tts_to_file(
|
| 309 |
+
text=request.text,
|
| 310 |
+
speaker_wav=speaker_wav,
|
| 311 |
+
language=request.language,
|
| 312 |
+
file_path=output_path
|
| 313 |
+
)
|
| 314 |
+
else:
|
| 315 |
+
# Fallback model (Tacotron2)
|
| 316 |
+
tts.tts_to_file(
|
| 317 |
+
text=request.text,
|
| 318 |
+
file_path=output_path
|
| 319 |
+
)
|
| 320 |
+
except Exception as tts_error:
|
| 321 |
+
print(f"β TTS generation failed: {tts_error}")
|
| 322 |
+
# Try alternative approach
|
| 323 |
+
try:
|
| 324 |
+
print("π Trying alternative TTS generation method...")
|
| 325 |
+
# Generate audio first, then save
|
| 326 |
+
if supports_voice_cloning():
|
| 327 |
+
audio = tts.tts(
|
| 328 |
+
text=request.text,
|
| 329 |
+
speaker_wav=speaker_wav,
|
| 330 |
+
language=request.language
|
| 331 |
+
)
|
| 332 |
+
else:
|
| 333 |
+
audio = tts.tts(text=request.text)
|
| 334 |
+
|
| 335 |
+
# Save manually
|
| 336 |
+
if not save_wav(audio, output_path):
|
| 337 |
+
raise Exception("Failed to save audio file")
|
| 338 |
+
|
| 339 |
+
except Exception as alt_error:
|
| 340 |
+
print(f"β Alternative method also failed: {alt_error}")
|
| 341 |
+
raise alt_error
|
| 342 |
|
| 343 |
+
# Verify the file was created
|
| 344 |
+
if not os.path.exists(output_path):
|
| 345 |
+
raise Exception(f"TTS failed to create output file: {output_path}")
|
| 346 |
+
|
| 347 |
+
file_size = os.path.getsize(output_path)
|
| 348 |
+
print(f"β
TTS generated: {output_path} ({file_size} bytes)")
|
| 349 |
|
| 350 |
# Upload to OCI
|
| 351 |
upload_result, error = upload_to_oci_with_retry(
|
|
|
|
| 354 |
|
| 355 |
if error:
|
| 356 |
print(f"β OCI upload failed: {error}")
|
| 357 |
+
# Still return the local file path if upload fails
|
| 358 |
return {
|
| 359 |
"status": "partial_success",
|
| 360 |
"message": f"TTS generated but upload failed: {error}",
|
| 361 |
"local_file": output_path,
|
| 362 |
+
"filename": filename,
|
| 363 |
+
"file_size": file_size
|
| 364 |
}
|
| 365 |
|
| 366 |
print(f"β
Upload successful: {filename}")
|
|
|
|
| 368 |
# Clean up local file
|
| 369 |
try:
|
| 370 |
os.remove(output_path)
|
| 371 |
+
print(f"π§Ή Cleaned up local file: {output_path}")
|
| 372 |
+
except Exception as cleanup_error:
|
| 373 |
+
print(f"β οΈ Could not clean up file: {cleanup_error}")
|
| 374 |
|
| 375 |
return {
|
| 376 |
"status": "success",
|
|
|
|
| 383 |
|
| 384 |
except Exception as e:
|
| 385 |
print(f"β TTS generation error: {str(e)}")
|
| 386 |
+
# Provide more detailed error information
|
| 387 |
+
error_detail = {
|
| 388 |
+
"error": str(e),
|
| 389 |
+
"model": current_model,
|
| 390 |
+
"voice_cloning_supported": supports_voice_cloning(),
|
| 391 |
+
"device": DEVICE
|
| 392 |
+
}
|
| 393 |
+
raise HTTPException(status_code=500, detail=error_detail)
|
| 394 |
|
| 395 |
@app.post("/api/batch-tts")
|
| 396 |
async def batch_generate_tts(request: BatchTTSRequest):
|
|
|
|
| 429 |
filename = f"voiceover_{i+1:02d}.wav"
|
| 430 |
output_path = f"/tmp/output/{filename}"
|
| 431 |
|
| 432 |
+
# Ensure output directory exists
|
| 433 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 434 |
+
|
| 435 |
+
# Generate TTS based on model capabilities - WITH ERROR HANDLING
|
| 436 |
+
try:
|
| 437 |
+
if supports_voice_cloning():
|
| 438 |
+
tts.tts_to_file(
|
| 439 |
+
text=text,
|
| 440 |
+
speaker_wav=speaker_wav,
|
| 441 |
+
language=request.language,
|
| 442 |
+
file_path=output_path
|
| 443 |
+
)
|
| 444 |
+
else:
|
| 445 |
+
tts.tts_to_file(
|
| 446 |
+
text=text,
|
| 447 |
+
file_path=output_path
|
| 448 |
+
)
|
| 449 |
+
except Exception as tts_error:
|
| 450 |
+
print(f"β TTS generation failed for text {i+1}: {tts_error}")
|
| 451 |
+
# Try alternative approach
|
| 452 |
+
try:
|
| 453 |
+
print("π Trying alternative TTS generation method...")
|
| 454 |
+
if supports_voice_cloning():
|
| 455 |
+
audio = tts.tts(
|
| 456 |
+
text=text,
|
| 457 |
+
speaker_wav=speaker_wav,
|
| 458 |
+
language=request.language
|
| 459 |
+
)
|
| 460 |
+
else:
|
| 461 |
+
audio = tts.tts(text=text)
|
| 462 |
+
|
| 463 |
+
# Save manually
|
| 464 |
+
if not save_wav(audio, output_path):
|
| 465 |
+
raise Exception("Failed to save audio file")
|
| 466 |
+
|
| 467 |
+
except Exception as alt_error:
|
| 468 |
+
print(f"β Alternative method also failed: {alt_error}")
|
| 469 |
+
results.append({
|
| 470 |
+
"text_index": i,
|
| 471 |
+
"status": "error",
|
| 472 |
+
"message": f"TTS generation failed: {str(alt_error)}",
|
| 473 |
+
"filename": filename
|
| 474 |
+
})
|
| 475 |
+
continue
|
| 476 |
+
|
| 477 |
+
# Verify the file was created
|
| 478 |
+
if not os.path.exists(output_path):
|
| 479 |
+
results.append({
|
| 480 |
+
"text_index": i,
|
| 481 |
+
"status": "error",
|
| 482 |
+
"message": f"TTS failed to create output file: {output_path}",
|
| 483 |
+
"filename": filename
|
| 484 |
+
})
|
| 485 |
+
continue
|
| 486 |
+
|
| 487 |
+
file_size = os.path.getsize(output_path)
|
| 488 |
+
print(f"β
TTS generated: {output_path} ({file_size} bytes)")
|
| 489 |
|
| 490 |
# Upload to OCI
|
| 491 |
upload_result, error = upload_to_oci_with_retry(
|
|
|
|
| 499 |
"status": "partial_success",
|
| 500 |
"message": f"TTS generated but upload failed: {error}",
|
| 501 |
"local_file": output_path,
|
| 502 |
+
"filename": filename,
|
| 503 |
+
"file_size": file_size
|
| 504 |
})
|
| 505 |
else:
|
| 506 |
print(f"β
Upload successful: {filename}")
|
|
|
|
| 509 |
"status": "success",
|
| 510 |
"message": "TTS generated and uploaded successfully",
|
| 511 |
"filename": filename,
|
| 512 |
+
"oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}"),
|
| 513 |
+
"file_size": file_size
|
| 514 |
})
|
| 515 |
|
| 516 |
# Clean up local file
|
| 517 |
try:
|
| 518 |
os.remove(output_path)
|
| 519 |
+
print(f"π§Ή Cleaned up local file: {output_path}")
|
| 520 |
+
except Exception as cleanup_error:
|
| 521 |
+
print(f"β οΈ Could not clean up file: {cleanup_error}")
|
| 522 |
|
| 523 |
return {
|
| 524 |
"status": "completed",
|