yukee1992 commited on
Commit
0fad05c
Β·
verified Β·
1 Parent(s): 253c435

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -184
app.py CHANGED
@@ -41,10 +41,6 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
41
  print(f"βœ… Using device: {DEVICE}")
42
  print(f"πŸ”§ OCI Upload URL: {OCI_UPLOAD_API_URL or 'Not configured - uploads will be local only'}")
43
 
44
- # Model configuration
45
- MODEL_REPO_ID = "coqui/XTTS-v2"
46
- MODEL_CACHE_DIR = "/tmp/tts_models"
47
-
48
  # Global state
49
  tts = None
50
  model_loaded = False
@@ -53,6 +49,7 @@ voice_cloning_supported = False
53
  model_loading = False
54
  model_load_attempts = 0
55
  current_voice_style = "default_female"
 
56
 
57
  # Pydantic models
58
  class TTSRequest(BaseModel):
@@ -60,7 +57,7 @@ class TTSRequest(BaseModel):
60
  project_id: str
61
  voice_name: Optional[str] = "default"
62
  language: Optional[str] = "en"
63
- voice_style: Optional[str] = "default_female" # Add voice style selection
64
 
65
  class BatchTTSRequest(BaseModel):
66
  texts: List[str]
@@ -83,7 +80,7 @@ def clean_text(text):
83
  import re
84
 
85
  if not text or not isinstance(text, str):
86
- return "Hello" # Default fallback text
87
 
88
  # Remove any non-ASCII characters
89
  text = text.encode('ascii', 'ignore').decode('ascii')
@@ -128,7 +125,6 @@ def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voi
128
  "subfolder": "voiceover"
129
  }
130
 
131
- # Add headers and better timeout handling
132
  headers = {
133
  "User-Agent": "TTS-API/1.0",
134
  "Accept": "application/json"
@@ -249,7 +245,7 @@ def save_wav(audio, file_path):
249
  # Try soundfile first
250
  try:
251
  import soundfile as sf
252
- sf.write(file_path, audio, 22050) # Standard TTS sample rate
253
  return True
254
  except ImportError:
255
  print("⚠️ soundfile not available, using fallback method")
@@ -258,17 +254,15 @@ def save_wav(audio, file_path):
258
  import wave
259
  import numpy as np
260
 
261
- # Ensure audio is numpy array
262
  if isinstance(audio, list):
263
  audio = np.array(audio)
264
 
265
- # Convert to 16-bit PCM
266
  audio_int16 = (audio * 32767).astype(np.int16)
267
 
268
  with wave.open(file_path, 'wb') as wav_file:
269
- wav_file.setnchannels(1) # Mono
270
- wav_file.setsampwidth(2) # 16-bit
271
- wav_file.setframerate(22050) # Sample rate
272
  wav_file.writeframes(audio_int16.tobytes())
273
 
274
  return True
@@ -278,134 +272,126 @@ def save_wav(audio, file_path):
278
  return False
279
 
280
  def load_tts_model(voice_style="default_female"):
281
- """Load TTS model with different voice options"""
282
  global tts, model_loaded, current_model, voice_cloning_supported, model_loading, model_load_attempts, current_voice_style
283
 
284
  if model_loading:
285
  print("⏳ Model is already being loaded...")
286
  return False
287
 
 
 
 
 
288
  model_loading = True
289
  model_load_attempts += 1
290
 
291
  try:
292
  from TTS.api import TTS
293
 
294
- # Handle TOS acceptance automatically
295
- import sys
296
- from io import StringIO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
- old_stdin = sys.stdin
299
- sys.stdin = StringIO('y\n')
300
 
 
 
 
 
301
  try:
302
- # Different models with different voice characteristics
303
- model_options = {
304
- "male_deep": {
305
- "name": "tts_models/en/vctk/vits",
306
- "description": "VITS - Multiple speakers (male/female options)",
307
- "speaker": "p225" # Male voice
308
- },
309
- "male_medium": {
310
- "name": "tts_models/en/vctk/vits",
311
- "description": "VITS - Multiple speakers",
312
- "speaker": "p226" # Male voice
313
- },
314
- "female_1": {
315
- "name": "tts_models/en/vctk/vits",
316
- "description": "VITS - Multiple speakers",
317
- "speaker": "p227" # Female voice
318
- },
319
- "female_2": {
320
- "name": "tts_models/en/vctk/vits",
321
- "description": "VITS - Multiple speakers",
322
- "speaker": "p228" # Female voice
323
- },
324
- "default_female": {
325
- "name": "tts_models/en/ljspeech/tacotron2-DDC",
326
- "description": "Tacotron2 - Default female (current)",
327
- "speaker": None
328
- },
329
- "clear_male": {
330
- "name": "tts_models/en/ek1/tacotron2",
331
- "description": "Tacotron2 - Clear male voice",
332
- "speaker": None
333
- }
334
- }
335
-
336
- selected_model = model_options.get(voice_style, model_options["default_female"])
337
- current_voice_style = voice_style
338
-
339
- print(f"πŸš€ Loading {selected_model['description']}...")
340
-
341
- # Load the selected model
342
- tts = TTS(selected_model["name"]).to(DEVICE)
343
-
344
- # Test the model
345
- test_path = "/tmp/test_output.wav"
346
-
347
- if selected_model["speaker"]:
348
- # For VITS model with speaker selection
349
- tts.tts_to_file(
350
- text="Test voice",
351
- file_path=test_path,
352
- speaker=selected_model["speaker"]
353
- )
354
- else:
355
- # For standard models
356
- tts.tts_to_file(text="Test voice", file_path=test_path)
357
-
358
- if os.path.exists(test_path):
359
- os.remove(test_path)
360
- print(f"βœ… {selected_model['description']} loaded successfully!")
361
- else:
362
- raise Exception("Test failed - no file created")
363
-
364
- model_loaded = True
365
- current_model = selected_model["name"]
366
- voice_cloning_supported = False
367
- return True
368
-
369
- except Exception as e:
370
- print(f"❌ Model loading failed: {e}")
371
- # Fallback to default
372
- print("πŸ”„ Falling back to default Tacotron2...")
373
  tts = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(DEVICE)
374
- model_loaded = True
375
- current_model = "tts_models/en/ljspeech/tacotron2-DDC"
376
- voice_cloning_supported = False
377
- current_voice_style = "default_female"
378
- return True
379
-
380
- finally:
381
- sys.stdin = old_stdin
382
 
383
  except Exception as e:
384
  print(f"❌ Failed to initialize TTS: {e}")
 
385
  return False
386
  finally:
387
  model_loading = False
388
 
389
- # Health check endpoint
390
  @app.get("/")
391
  async def root():
 
392
  return {
393
  "status": "running",
394
  "service": "TTS API",
 
395
  "model_loaded": model_loaded,
396
- "current_model": current_model,
397
  "device": DEVICE,
398
  "oci_configured": bool(OCI_UPLOAD_API_URL)
399
  }
400
 
401
- @app.get("/api/health")
402
  async def health_check():
403
- """Health check endpoint"""
404
  return {
405
  "status": "healthy",
 
 
406
  "model_loaded": model_loaded,
407
- "current_model": current_model,
 
 
 
 
 
 
 
 
 
408
  "device": DEVICE,
 
409
  "timestamp": datetime.now().isoformat()
410
  }
411
 
@@ -419,9 +405,9 @@ async def check_oci_health():
419
  }
420
 
421
  try:
422
- # Test connection to OCI service
423
  test_url = f"{OCI_UPLOAD_API_URL}/api/health"
424
- response = requests.get(test_url, timeout=10)
425
 
426
  if response.status_code == 200:
427
  return {
@@ -447,12 +433,13 @@ async def check_oci_health():
447
  async def generate_tts(request: TTSRequest):
448
  """Generate TTS for a single text with lazy model loading"""
449
  try:
450
- # Lazy load model on first request with voice style
451
  if not model_loaded or current_voice_style != request.voice_style:
 
452
  if not load_tts_model(request.voice_style):
453
  return {
454
  "status": "error",
455
- "message": "TTS model failed to load. Please check the logs.",
456
  "requires_tos_acceptance": True,
457
  "tos_url": "https://coqui.ai/cpml.txt"
458
  }
@@ -460,7 +447,6 @@ async def generate_tts(request: TTSRequest):
460
  print(f"πŸ“₯ TTS request for project: {request.project_id}")
461
  print(f" Text length: {len(request.text)} characters")
462
  print(f" Voice style: {request.voice_style}")
463
- print(f" Language: {request.language}")
464
 
465
  # Check if voice cloning is requested but not supported
466
  if request.voice_name != "default" and not supports_voice_cloning():
@@ -470,7 +456,7 @@ async def generate_tts(request: TTSRequest):
470
  "model": current_model
471
  }
472
 
473
- # Generate unique filename with sequential naming
474
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
475
  filename = f"voiceover_{timestamp}.wav"
476
  output_path = f"/tmp/output/{filename}"
@@ -492,52 +478,24 @@ async def generate_tts(request: TTSRequest):
492
 
493
  # Clean the text before generation
494
  cleaned_text = clean_text(request.text)
495
- print(f"πŸ“ Original text: '{request.text}'")
496
  print(f"πŸ“ Cleaned text: '{cleaned_text}'")
497
 
498
- # Generate TTS based on model capabilities - WITH ERROR HANDLING
499
  try:
500
- print(f"πŸ”Š Attempting TTS generation with {current_model}...")
501
 
502
- # Get the speaker for VITS models
503
- speaker = None
504
- if "vctk/vits" in current_model:
505
- # Map voice styles to VITS speakers
506
- speaker_map = {
507
- "male_deep": "p225",
508
- "male_medium": "p226",
509
- "female_1": "p227",
510
- "female_2": "p228"
511
- }
512
- speaker = speaker_map.get(request.voice_style)
513
-
514
- if speaker:
515
- # For VITS model with speaker selection
516
- tts.tts_to_file(
517
- text=cleaned_text,
518
- file_path=output_path,
519
- speaker=speaker
520
- )
521
- else:
522
- # For standard models
523
- tts.tts_to_file(
524
- text=cleaned_text,
525
- file_path=output_path
526
- )
527
 
528
  except Exception as tts_error:
529
  print(f"❌ TTS generation failed: {tts_error}")
530
  # Try alternative approach
531
  try:
532
  print("πŸ”„ Trying alternative TTS generation method...")
533
- # Generate audio first, then save
534
- if speaker:
535
- audio = tts.tts(
536
- text=cleaned_text,
537
- speaker=speaker
538
- )
539
- else:
540
- audio = tts.tts(text=cleaned_text)
541
 
542
  # Save manually
543
  if not save_wav(audio, output_path):
@@ -561,7 +519,7 @@ async def generate_tts(request: TTSRequest):
561
 
562
  if error:
563
  print(f"❌ OCI upload failed: {error}")
564
- # Still return the local file path if upload fails
565
  return {
566
  "status": "success_local",
567
  "message": f"TTS generated locally (upload failed: {error})",
@@ -588,21 +546,12 @@ async def generate_tts(request: TTSRequest):
588
  "filename": filename,
589
  "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}"),
590
  "model_used": current_model,
591
- "voice_style": request.voice_style,
592
- "voice_cloning": supports_voice_cloning() and request.voice_name != "default"
593
  }
594
 
595
  except Exception as e:
596
  print(f"❌ TTS generation error: {str(e)}")
597
- # Provide more detailed error information
598
- error_detail = {
599
- "error": str(e),
600
- "model": current_model,
601
- "voice_style": request.voice_style,
602
- "voice_cloning_supported": supports_voice_cloning(),
603
- "device": DEVICE
604
- }
605
- raise HTTPException(status_code=500, detail=error_detail)
606
 
607
  @app.post("/api/batch-tts")
608
  async def batch_generate_tts(request: BatchTTSRequest):
@@ -610,13 +559,13 @@ async def batch_generate_tts(request: BatchTTSRequest):
610
  try:
611
  # Lazy load model on first request
612
  if not model_loaded or current_voice_style != request.voice_style:
 
613
  if not load_tts_model(request.voice_style):
614
  raise HTTPException(status_code=500, detail="TTS model failed to load")
615
 
616
  print(f"πŸ“₯ Batch TTS request for project: {request.project_id}")
617
  print(f" Number of texts: {len(request.texts)}")
618
  print(f" Voice style: {request.voice_style}")
619
- print(f" Language: {request.language}")
620
 
621
  # Check if voice cloning is requested but not supported
622
  if request.voice_name != "default" and not supports_voice_cloning():
@@ -647,25 +596,7 @@ async def batch_generate_tts(request: BatchTTSRequest):
647
 
648
  # Generate TTS
649
  try:
650
- # Get speaker for VITS models
651
- speaker = None
652
- if "vctk/vits" in current_model:
653
- speaker_map = {
654
- "male_deep": "p225",
655
- "male_medium": "p226",
656
- "female_1": "p227",
657
- "female_2": "p228"
658
- }
659
- speaker = speaker_map.get(request.voice_style)
660
-
661
- if speaker:
662
- tts.tts_to_file(
663
- text=cleaned_text,
664
- file_path=output_path,
665
- speaker=speaker
666
- )
667
- else:
668
- tts.tts_to_file(text=cleaned_text, file_path=output_path)
669
 
670
  # Verify file was created
671
  if not os.path.exists(output_path):
@@ -828,15 +759,42 @@ async def change_voice_style(request: ChangeVoiceRequest):
828
  async def get_voice_styles():
829
  """Get available voice styles"""
830
  styles = {
831
- "male_deep": "Deep male voice (VITS)",
832
- "male_medium": "Medium male voice (VITS)",
833
- "female_1": "Female voice 1 (VITS)",
834
- "female_2": "Female voice 2 (VITS)",
835
- "default_female": "Default female voice (Tacotron2)",
836
- "clear_male": "Clear male voice (Tacotron2)"
837
  }
838
  return {"voice_styles": styles}
839
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
840
  if __name__ == "__main__":
841
  import uvicorn
842
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
41
  print(f"βœ… Using device: {DEVICE}")
42
  print(f"πŸ”§ OCI Upload URL: {OCI_UPLOAD_API_URL or 'Not configured - uploads will be local only'}")
43
 
 
 
 
 
44
  # Global state
45
  tts = None
46
  model_loaded = False
 
49
  model_loading = False
50
  model_load_attempts = 0
51
  current_voice_style = "default_female"
52
+ app_startup_time = datetime.now()
53
 
54
  # Pydantic models
55
  class TTSRequest(BaseModel):
 
57
  project_id: str
58
  voice_name: Optional[str] = "default"
59
  language: Optional[str] = "en"
60
+ voice_style: Optional[str] = "default_female"
61
 
62
  class BatchTTSRequest(BaseModel):
63
  texts: List[str]
 
80
  import re
81
 
82
  if not text or not isinstance(text, str):
83
+ return "Hello"
84
 
85
  # Remove any non-ASCII characters
86
  text = text.encode('ascii', 'ignore').decode('ascii')
 
125
  "subfolder": "voiceover"
126
  }
127
 
 
128
  headers = {
129
  "User-Agent": "TTS-API/1.0",
130
  "Accept": "application/json"
 
245
  # Try soundfile first
246
  try:
247
  import soundfile as sf
248
+ sf.write(file_path, audio, 22050)
249
  return True
250
  except ImportError:
251
  print("⚠️ soundfile not available, using fallback method")
 
254
  import wave
255
  import numpy as np
256
 
 
257
  if isinstance(audio, list):
258
  audio = np.array(audio)
259
 
 
260
  audio_int16 = (audio * 32767).astype(np.int16)
261
 
262
  with wave.open(file_path, 'wb') as wav_file:
263
+ wav_file.setnchannels(1)
264
+ wav_file.setsampwidth(2)
265
+ wav_file.setframerate(22050)
266
  wav_file.writeframes(audio_int16.tobytes())
267
 
268
  return True
 
272
  return False
273
 
274
  def load_tts_model(voice_style="default_female"):
275
+ """Load TTS model with different voice options - LAZY LOADING"""
276
  global tts, model_loaded, current_model, voice_cloning_supported, model_loading, model_load_attempts, current_voice_style
277
 
278
  if model_loading:
279
  print("⏳ Model is already being loaded...")
280
  return False
281
 
282
+ if model_loaded and current_voice_style == voice_style:
283
+ print("βœ… Model already loaded with requested voice style")
284
+ return True
285
+
286
  model_loading = True
287
  model_load_attempts += 1
288
 
289
  try:
290
  from TTS.api import TTS
291
 
292
+ # Use smaller, faster models for initial load
293
+ model_options = {
294
+ "default_female": {
295
+ "name": "tts_models/en/ljspeech/tacotron2-DDC",
296
+ "description": "Tacotron2 - Default female (fast)",
297
+ "speaker": None
298
+ },
299
+ "clear_male": {
300
+ "name": "tts_models/en/ek1/tacotron2",
301
+ "description": "Tacotron2 - Clear male voice",
302
+ "speaker": None
303
+ },
304
+ # Fallbacks for other voice styles
305
+ "male_deep": {
306
+ "name": "tts_models/en/ljspeech/tacotron2-DDC",
307
+ "description": "Tacotron2 - Default female (fallback)",
308
+ "speaker": None
309
+ },
310
+ "male_medium": {
311
+ "name": "tts_models/en/ljspeech/tacotron2-DDC",
312
+ "description": "Tacotron2 - Default female (fallback)",
313
+ "speaker": None
314
+ },
315
+ "female_1": {
316
+ "name": "tts_models/en/ljspeech/tacotron2-DDC",
317
+ "description": "Tacotron2 - Default female (fallback)",
318
+ "speaker": None
319
+ },
320
+ "female_2": {
321
+ "name": "tts_models/en/ljspeech/tacotron2-DDC",
322
+ "description": "Tacotron2 - Default female (fallback)",
323
+ "speaker": None
324
+ }
325
+ }
326
+
327
+ selected_model = model_options.get(voice_style, model_options["default_female"])
328
+ current_voice_style = voice_style
329
 
330
+ print(f"πŸš€ Loading {selected_model['description']}...")
331
+ print(f"πŸ“₯ This may take a few minutes on first load...")
332
 
333
+ # Load the selected model
334
+ tts = TTS(selected_model["name"]).to(DEVICE)
335
+
336
+ # Quick test
337
  try:
338
+ test_text = "Hello"
339
+ audio = tts.tts(text=test_text)
340
+ print(f"βœ… {selected_model['description']} loaded successfully!")
341
+ except Exception as test_error:
342
+ print(f"❌ Model test failed: {test_error}")
343
+ # Try fallback to default
344
+ print("πŸ”„ Trying fallback model...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  tts = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(DEVICE)
346
+ tts.tts(text="Hello")
347
+ selected_model = model_options["default_female"]
348
+
349
+ model_loaded = True
350
+ current_model = selected_model["name"]
351
+ voice_cloning_supported = False
352
+ model_load_attempts = 0
353
+ return True
354
 
355
  except Exception as e:
356
  print(f"❌ Failed to initialize TTS: {e}")
357
+ model_loading = False
358
  return False
359
  finally:
360
  model_loading = False
361
 
362
+ # Health check endpoints - CRITICAL FOR DEPLOYMENT
363
  @app.get("/")
364
  async def root():
365
+ """Root endpoint - always responds quickly"""
366
  return {
367
  "status": "running",
368
  "service": "TTS API",
369
+ "startup_time": app_startup_time.isoformat(),
370
  "model_loaded": model_loaded,
 
371
  "device": DEVICE,
372
  "oci_configured": bool(OCI_UPLOAD_API_URL)
373
  }
374
 
375
+ @app.get("/health")
376
  async def health_check():
377
+ """Health check endpoint - must respond quickly"""
378
  return {
379
  "status": "healthy",
380
+ "timestamp": datetime.now().isoformat(),
381
+ "startup_time": app_startup_time.isoformat(),
382
  "model_loaded": model_loaded,
383
+ "service": "TTS API"
384
+ }
385
+
386
+ @app.get("/api/health")
387
+ async def api_health_check():
388
+ """API health check with model status"""
389
+ return {
390
+ "status": "healthy",
391
+ "model_loaded": model_loaded,
392
+ "current_model": current_model if model_loaded else "none",
393
  "device": DEVICE,
394
+ "uptime": str(datetime.now() - app_startup_time),
395
  "timestamp": datetime.now().isoformat()
396
  }
397
 
 
405
  }
406
 
407
  try:
408
+ # Test connection to OCI service with short timeout
409
  test_url = f"{OCI_UPLOAD_API_URL}/api/health"
410
+ response = requests.get(test_url, timeout=5)
411
 
412
  if response.status_code == 200:
413
  return {
 
433
  async def generate_tts(request: TTSRequest):
434
  """Generate TTS for a single text with lazy model loading"""
435
  try:
436
+ # Lazy load model on first request
437
  if not model_loaded or current_voice_style != request.voice_style:
438
+ print("πŸ”„ Lazy loading TTS model...")
439
  if not load_tts_model(request.voice_style):
440
  return {
441
  "status": "error",
442
+ "message": "TTS model failed to load. Please try again in a moment.",
443
  "requires_tos_acceptance": True,
444
  "tos_url": "https://coqui.ai/cpml.txt"
445
  }
 
447
  print(f"πŸ“₯ TTS request for project: {request.project_id}")
448
  print(f" Text length: {len(request.text)} characters")
449
  print(f" Voice style: {request.voice_style}")
 
450
 
451
  # Check if voice cloning is requested but not supported
452
  if request.voice_name != "default" and not supports_voice_cloning():
 
456
  "model": current_model
457
  }
458
 
459
+ # Generate unique filename
460
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
461
  filename = f"voiceover_{timestamp}.wav"
462
  output_path = f"/tmp/output/{filename}"
 
478
 
479
  # Clean the text before generation
480
  cleaned_text = clean_text(request.text)
 
481
  print(f"πŸ“ Cleaned text: '{cleaned_text}'")
482
 
483
+ # Generate TTS with error handling
484
  try:
485
+ print(f"πŸ”Š Generating TTS with {current_model}...")
486
 
487
+ # Simple TTS generation for fast models
488
+ tts.tts_to_file(
489
+ text=cleaned_text,
490
+ file_path=output_path
491
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
 
493
  except Exception as tts_error:
494
  print(f"❌ TTS generation failed: {tts_error}")
495
  # Try alternative approach
496
  try:
497
  print("πŸ”„ Trying alternative TTS generation method...")
498
+ audio = tts.tts(text=cleaned_text)
 
 
 
 
 
 
 
499
 
500
  # Save manually
501
  if not save_wav(audio, output_path):
 
519
 
520
  if error:
521
  print(f"❌ OCI upload failed: {error}")
522
+ # Return success with local file info
523
  return {
524
  "status": "success_local",
525
  "message": f"TTS generated locally (upload failed: {error})",
 
546
  "filename": filename,
547
  "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}"),
548
  "model_used": current_model,
549
+ "voice_style": request.voice_style
 
550
  }
551
 
552
  except Exception as e:
553
  print(f"❌ TTS generation error: {str(e)}")
554
+ raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
 
 
 
 
 
 
 
 
555
 
556
  @app.post("/api/batch-tts")
557
  async def batch_generate_tts(request: BatchTTSRequest):
 
559
  try:
560
  # Lazy load model on first request
561
  if not model_loaded or current_voice_style != request.voice_style:
562
+ print("πŸ”„ Lazy loading TTS model for batch processing...")
563
  if not load_tts_model(request.voice_style):
564
  raise HTTPException(status_code=500, detail="TTS model failed to load")
565
 
566
  print(f"πŸ“₯ Batch TTS request for project: {request.project_id}")
567
  print(f" Number of texts: {len(request.texts)}")
568
  print(f" Voice style: {request.voice_style}")
 
569
 
570
  # Check if voice cloning is requested but not supported
571
  if request.voice_name != "default" and not supports_voice_cloning():
 
596
 
597
  # Generate TTS
598
  try:
599
+ tts.tts_to_file(text=cleaned_text, file_path=output_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
 
601
  # Verify file was created
602
  if not os.path.exists(output_path):
 
759
  async def get_voice_styles():
760
  """Get available voice styles"""
761
  styles = {
762
+ "default_female": "Default female voice (Tacotron2) - Fast",
763
+ "clear_male": "Clear male voice (Tacotron2) - Fast",
764
+ "male_deep": "Deep male voice (Fallback to default)",
765
+ "male_medium": "Medium male voice (Fallback to default)",
766
+ "female_1": "Female voice 1 (Fallback to default)",
767
+ "female_2": "Female voice 2 (Fallback to default)"
768
  }
769
  return {"voice_styles": styles}
770
 
771
+ @app.get("/api/status")
772
+ async def get_status():
773
+ """Get detailed application status"""
774
+ return {
775
+ "status": "running",
776
+ "model_loaded": model_loaded,
777
+ "current_model": current_model if model_loaded else "none",
778
+ "current_voice_style": current_voice_style,
779
+ "device": DEVICE,
780
+ "oci_configured": bool(OCI_UPLOAD_API_URL),
781
+ "startup_time": app_startup_time.isoformat(),
782
+ "uptime": str(datetime.now() - app_startup_time),
783
+ "model_load_attempts": model_load_attempts
784
+ }
785
+
786
+ # Startup event - NO MODEL LOADING to avoid timeouts
787
+ @app.on_event("startup")
788
+ async def startup_event():
789
+ """Startup event - no model loading to avoid timeouts"""
790
+ print("=" * 50)
791
+ print("πŸš€ TTS API Starting Up...")
792
+ print(f"βœ… Device: {DEVICE}")
793
+ print(f"πŸ”§ OCI Upload: {OCI_UPLOAD_API_URL or 'Local only'}")
794
+ print("πŸ“ Models will load on first request (lazy loading)")
795
+ print("⏰ Startup time:", app_startup_time.isoformat())
796
+ print("=" * 50)
797
+
798
  if __name__ == "__main__":
799
  import uvicorn
800
+ uvicorn.run(app, host="0.0.0.0", port=8000, access_log=False)