yukee1992 commited on
Commit
2645cdf
·
verified ·
1 Parent(s): ff5cdb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -338
app.py CHANGED
@@ -13,7 +13,6 @@ from fastapi import FastAPI, HTTPException, Form, UploadFile, File
13
  from fastapi.middleware.cors import CORSMiddleware
14
  from pydantic import BaseModel
15
  import torch
16
- import numpy as np
17
 
18
  # Global state
19
  tts = None
@@ -78,13 +77,6 @@ class TTSRequest(BaseModel):
78
  language: Optional[str] = "en"
79
  voice_style: Optional[str] = "default_female"
80
 
81
- class BatchTTSRequest(BaseModel):
82
- texts: List[str]
83
- project_id: str
84
- voice_name: Optional[str] = "default"
85
- language: Optional[str] = "en"
86
- voice_style: Optional[str] = "default_female"
87
-
88
  class VoiceCloneRequest(BaseModel):
89
  project_id: str
90
  voice_name: str
@@ -101,112 +93,47 @@ def clean_text(text):
101
  if not text or not isinstance(text, str):
102
  return "Hello"
103
 
104
- # Remove any non-ASCII characters
105
  text = text.encode('ascii', 'ignore').decode('ascii')
106
-
107
- # Remove any problematic characters but keep basic punctuation
108
  text = re.sub(r'[^\w\s\.\,\!\?\-\'\"\:]', '', text)
109
-
110
- # Replace multiple spaces with single space
111
  text = re.sub(r'\s+', ' ', text)
112
 
113
- # Ensure text ends with punctuation if it's a sentence
114
  if len(text) > 10 and not re.search(r'[\.\!\?]$', text):
115
  text = text + '.'
116
 
117
  text = text.strip()
118
 
119
- # If text is empty after cleaning, use default
120
  if not text:
121
  text = "Hello world"
122
 
123
  return text
124
 
125
  def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voiceover"):
126
- """Upload file to OCI using your existing API with subfolder support"""
127
  try:
128
  if not OCI_UPLOAD_API_URL:
129
  print("⚠️ OCI upload skipped - OCI_UPLOAD_API_URL not configured")
130
  return {"status": "skipped", "message": "OCI upload disabled"}, None
131
 
132
- if not OCI_UPLOAD_API_URL.startswith(('http://', 'https://')):
133
- return None, f"Invalid OCI URL format: {OCI_UPLOAD_API_URL}"
134
-
135
  url = f"{OCI_UPLOAD_API_URL}/api/upload"
136
  print(f"🔗 Attempting upload to: {url}")
137
 
138
  with open(file_path, "rb") as f:
139
  files = {"file": (filename, f, "audio/wav")}
140
- data = {
141
- "project_id": project_id,
142
- "subfolder": "voiceover"
143
- }
144
-
145
- headers = {
146
- "User-Agent": "TTS-API/1.0",
147
- "Accept": "application/json"
148
- }
149
 
150
- response = requests.post(
151
- url,
152
- files=files,
153
- data=data,
154
- headers=headers,
155
- timeout=30
156
- )
157
-
158
- print(f"📡 Upload response status: {response.status_code}")
159
 
160
  if response.status_code == 200:
161
- try:
162
- result = response.json()
163
- if result.get("status") == "success":
164
- print(f"✅ Upload successful: {result}")
165
- return result, None
166
- else:
167
- error_msg = result.get("message", "Upload failed - unknown error")
168
- print(f"❌ Upload failed: {error_msg}")
169
- return None, error_msg
170
- except ValueError as e:
171
- return None, f"Invalid JSON response: {str(e)}"
172
- else:
173
- error_msg = f"Upload failed with status {response.status_code}: {response.text}"
174
- print(f"❌ {error_msg}")
175
- return None, error_msg
176
-
177
- except Exception as e:
178
- error_msg = f"Upload error: {str(e)}"
179
- print(f"❌ {error_msg}")
180
- return None, error_msg
181
-
182
- def upload_to_oci_with_retry(file_path: str, filename: str, project_id: str, file_type="voiceover", max_retries=2):
183
- """Upload file to OCI with retry logic"""
184
- for attempt in range(max_retries):
185
- try:
186
- print(f"🔄 Upload attempt {attempt + 1} of {max_retries} for {filename}")
187
- result, error = upload_to_oci(file_path, filename, project_id, file_type)
188
-
189
- if error:
190
- if attempt < max_retries - 1:
191
- wait_time = 2 ** attempt
192
- print(f"⏳ Upload failed, retrying in {wait_time}s: {error}")
193
- time.sleep(wait_time)
194
- continue
195
  else:
196
- return None, error
197
  else:
198
- return result, None
199
 
200
- except Exception as e:
201
- if attempt < max_retries - 1:
202
- wait_time = 2 ** attempt
203
- print(f"⏳ Upload exception, retrying in {wait_time}s: {str(e)}")
204
- time.sleep(wait_time)
205
- continue
206
- else:
207
- return None, f"Upload failed after {max_retries} attempts: {str(e)}"
208
-
209
- return None, "Upload failed: unexpected error"
210
 
211
  def get_voice_path(voice_name: str):
212
  """Get path to voice file"""
@@ -217,9 +144,7 @@ def get_voice_path(voice_name: str):
217
  if voice_path.is_dir():
218
  samples = list(voice_path.glob("sample_*.wav"))
219
  return str(samples[0]) if samples else None
220
- else:
221
- voice_file = Path(f"/tmp/voices/{voice_name}.wav")
222
- return str(voice_file) if voice_file.exists() else None
223
 
224
  def clone_voice(voice_name: str, audio_files: List[str], description: str = ""):
225
  """Clone a voice from audio samples"""
@@ -244,40 +169,8 @@ def supports_voice_cloning():
244
  """Check if the current model supports voice cloning"""
245
  return "xtts" in current_model.lower()
246
 
247
- def save_wav(audio, file_path):
248
- """Save audio to WAV file manually"""
249
- try:
250
- # Try soundfile first
251
- try:
252
- import soundfile as sf
253
- sf.write(file_path, audio, 22050)
254
- return True
255
- except ImportError:
256
- print("⚠️ soundfile not available, using fallback method")
257
-
258
- # Fallback: use wave library
259
- import wave
260
- import numpy as np
261
-
262
- if isinstance(audio, list):
263
- audio = np.array(audio)
264
-
265
- audio_int16 = (audio * 32767).astype(np.int16)
266
-
267
- with wave.open(file_path, 'wb') as wav_file:
268
- wav_file.setnchannels(1)
269
- wav_file.setsampwidth(2)
270
- wav_file.setframerate(22050)
271
- wav_file.writeframes(audio_int16.tobytes())
272
-
273
- return True
274
-
275
- except Exception as e:
276
- print(f"❌ Failed to save WAV: {e}")
277
- return False
278
-
279
  def load_tts_model(voice_style="default_female"):
280
- """Load TTS model with different voice options - LAZY LOADING"""
281
  global tts, model_loaded, current_model, model_loading, current_voice_style, voice_cloning_supported
282
 
283
  if model_loading:
@@ -293,7 +186,6 @@ def load_tts_model(voice_style="default_female"):
293
  try:
294
  from TTS.api import TTS
295
 
296
- # Use only fast, lightweight models with proper voice styles
297
  model_options = {
298
  "default_female": {
299
  "name": "tts_models/en/ljspeech/tacotron2-DDC",
@@ -303,22 +195,6 @@ def load_tts_model(voice_style="default_female"):
303
  "name": "tts_models/en/ek1/tacotron2",
304
  "description": "Tacotron2 - Clear male voice",
305
  },
306
- "male_deep": {
307
- "name": "tts_models/en/ek1/tacotron2",
308
- "description": "Tacotron2 - Deep male voice",
309
- },
310
- "male_medium": {
311
- "name": "tts_models/en/ljspeech/glow-tts",
312
- "description": "Glow-TTS - Medium male voice",
313
- },
314
- "female_1": {
315
- "name": "tts_models/en/ljspeech/tacotron2-DDC",
316
- "description": "Tacotron2 - Female voice 1",
317
- },
318
- "female_2": {
319
- "name": "tts_models/en/ljspeech/glow-tts",
320
- "description": "Glow-TTS - Female voice 2",
321
- },
322
  "voice_clone": {
323
  "name": "tts_models/multilingual/multi-dataset/xtts_v2",
324
  "description": "XTTS v2 - Voice cloning supported",
@@ -329,24 +205,11 @@ def load_tts_model(voice_style="default_female"):
329
  current_voice_style = voice_style
330
 
331
  print(f"🚀 Loading {selected_model['description']}...")
332
- print("📥 Downloading model (this may take a few minutes on first load)...")
333
 
334
- # Load the selected model
335
  tts = TTS(selected_model["name"]).to(DEVICE)
336
 
337
  # Quick test
338
- try:
339
- test_text = "Hello"
340
- audio = tts.tts(text=test_text)
341
- print(f"✅ {selected_model['description']} loaded successfully!")
342
- except Exception as test_error:
343
- print(f"❌ Model test failed: {test_error}")
344
- # Try fallback to default
345
- print("🔄 Trying fallback model...")
346
- tts = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(DEVICE)
347
- tts.tts(text="Hello")
348
- selected_model = model_options["default_female"]
349
- current_voice_style = "default_female"
350
 
351
  model_loaded = True
352
  current_model = selected_model["name"]
@@ -360,110 +223,60 @@ def load_tts_model(voice_style="default_female"):
360
  finally:
361
  model_loading = False
362
 
363
- # Health check endpoints - CRITICAL FOR DEPLOYMENT
364
  @app.get("/")
365
  async def root():
366
- """Root endpoint - redirect to docs"""
367
  return {
368
  "status": "running",
369
  "service": "TTS API",
370
- "message": "Visit /docs for API documentation"
 
371
  }
372
 
373
  @app.get("/health")
374
  async def health_check():
375
- """Health check endpoint - must respond quickly"""
376
  return {
377
  "status": "healthy",
378
  "timestamp": datetime.now().isoformat(),
379
- "startup_time": app_startup_time.isoformat(),
380
  "model_loaded": model_loaded,
381
  "service": "TTS API"
382
  }
383
 
384
  @app.get("/api/health")
385
  async def api_health_check():
386
- """API health check with model status"""
387
  return {
388
  "status": "healthy",
389
  "model_loaded": model_loaded,
390
  "current_model": current_model if model_loaded else "none",
391
- "current_voice_style": current_voice_style,
392
- "voice_cloning_supported": voice_cloning_supported,
393
- "device": DEVICE,
394
- "uptime": str(datetime.now() - app_startup_time),
395
- "timestamp": datetime.now().isoformat()
396
  }
397
 
398
- @app.get("/api/oci-health")
399
- async def check_oci_health():
400
- """Check if OCI upload service is available"""
401
- if not OCI_UPLOAD_API_URL:
402
- return {
403
- "status": "not_configured",
404
- "message": "OCI_UPLOAD_API_URL environment variable is not set"
405
- }
406
-
407
- try:
408
- test_url = f"{OCI_UPLOAD_API_URL}/api/health"
409
- response = requests.get(test_url, timeout=5)
410
-
411
- if response.status_code == 200:
412
- return {
413
- "status": "healthy",
414
- "oci_url": OCI_UPLOAD_API_URL,
415
- "message": "OCI service is reachable"
416
- }
417
- else:
418
- return {
419
- "status": "unhealthy",
420
- "oci_url": OCI_UPLOAD_API_URL,
421
- "message": f"OCI service returned status {response.status_code}"
422
- }
423
- except Exception as e:
424
- return {
425
- "status": "error",
426
- "oci_url": OCI_UPLOAD_API_URL,
427
- "message": f"Cannot connect to OCI service: {str(e)}"
428
- }
429
-
430
- # API endpoints with lazy loading
431
  @app.post("/api/tts")
432
  async def generate_tts(request: TTSRequest):
433
- """Generate TTS for a single text with lazy model loading"""
434
  try:
435
- # Lazy load model on first request or when voice style changes
436
  if not model_loaded or current_voice_style != request.voice_style:
437
  print("🔄 Lazy loading TTS model...")
438
  if not load_tts_model(request.voice_style):
439
  return {
440
  "status": "error",
441
- "message": "TTS model failed to load. Please try again in a moment.",
442
- "requires_tos_acceptance": True,
443
- "tos_url": "https://coqui.ai/cpml.txt"
444
  }
445
 
446
  print(f"📥 TTS request for project: {request.project_id}")
447
- print(f" Text length: {len(request.text)} characters")
448
- print(f" Voice style: {request.voice_style}")
449
- print(f" Voice name: {request.voice_name}")
450
 
451
- # Check if voice cloning is requested but not supported
452
- if request.voice_name != "default" and not voice_cloning_supported:
453
- return {
454
- "status": "error",
455
- "message": "Voice cloning is not supported with the current model. Please use 'voice_clone' voice style for cloning.",
456
- "model": current_model
457
- }
458
-
459
- # Generate unique filename
460
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
461
  filename = f"voiceover_{timestamp}.wav"
462
  output_path = f"/tmp/output/{filename}"
463
 
464
- # Ensure output directory exists
465
  os.makedirs(os.path.dirname(output_path), exist_ok=True)
466
 
 
 
467
  # Get voice path if custom voice is requested
468
  speaker_wav = None
469
  if request.voice_name != "default":
@@ -471,102 +284,43 @@ async def generate_tts(request: TTSRequest):
471
  if not speaker_wav:
472
  return {
473
  "status": "error",
474
- "message": f"Voice '{request.voice_name}' not found. Please clone the voice first using /api/clone-voice."
475
  }
476
- print(f"🎙️ Using cloned voice: {request.voice_name}")
477
 
478
- print(f"🔊 Generating TTS to: {output_path}")
479
-
480
- # Clean the text before generation
481
- cleaned_text = clean_text(request.text)
482
- print(f"📝 Cleaned text: '{cleaned_text}'")
483
-
484
- # Generate TTS with error handling
485
- try:
486
- print(f"🔊 Generating TTS with {current_model}...")
487
-
488
- if speaker_wav and voice_cloning_supported:
489
- # Use voice cloning
490
- tts.tts_to_file(
491
- text=cleaned_text,
492
- file_path=output_path,
493
- speaker_wav=speaker_wav
494
- )
495
- else:
496
- # Simple TTS generation for fast models
497
- tts.tts_to_file(
498
- text=cleaned_text,
499
- file_path=output_path
500
- )
501
-
502
- except Exception as tts_error:
503
- print(f"❌ TTS generation failed: {tts_error}")
504
- # Try alternative approach
505
- try:
506
- print("🔄 Trying alternative TTS generation method...")
507
- if speaker_wav and voice_cloning_supported:
508
- audio = tts.tts(text=cleaned_text, speaker_wav=speaker_wav)
509
- else:
510
- audio = tts.tts(text=cleaned_text)
511
-
512
- # Save manually
513
- if not save_wav(audio, output_path):
514
- raise Exception("Failed to save audio file")
515
-
516
- except Exception as alt_error:
517
- print(f"❌ Alternative method also failed: {alt_error}")
518
- raise alt_error
519
 
520
- # Verify the file was created
521
  if not os.path.exists(output_path):
522
- raise Exception(f"TTS failed to create output file: {output_path}")
523
 
524
  file_size = os.path.getsize(output_path)
525
- print(f"✅ TTS generated: {output_path} ({file_size} bytes)")
526
 
527
- # Upload to OCI
528
- upload_result, error = upload_to_oci_with_retry(
529
- output_path, filename, request.project_id, "voiceover"
530
- )
531
 
532
  if error:
533
- print(f"❌ OCI upload failed: {error}")
534
- # Return success with local file info
535
  return {
536
  "status": "success_local",
537
  "message": f"TTS generated locally (upload failed: {error})",
538
  "local_file": output_path,
539
  "filename": filename,
540
- "file_size": file_size,
541
- "voice_style": request.voice_style,
542
- "voice_name": request.voice_name,
543
- "model_used": current_model,
544
- "voice_cloning_used": speaker_wav is not None,
545
- "oci_upload_error": error
546
  }
547
 
548
- print(f"✅ Upload successful: {filename}")
549
-
550
- # Clean up local file
551
  try:
552
  os.remove(output_path)
553
- print(f"🧹 Cleaned up local file: {output_path}")
554
- except Exception as cleanup_error:
555
- print(f"⚠️ Could not clean up file: {cleanup_error}")
556
 
557
  return {
558
  "status": "success",
559
  "message": "TTS generated and uploaded successfully",
560
  "filename": filename,
561
- "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}"),
562
- "model_used": current_model,
563
- "voice_style": request.voice_style,
564
- "voice_name": request.voice_name,
565
- "voice_cloning_used": speaker_wav is not None
566
  }
567
 
568
  except Exception as e:
569
- print(f"❌ TTS generation error: {str(e)}")
570
  raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
571
 
572
  @app.post("/api/clone-voice")
@@ -581,15 +335,6 @@ async def clone_voice_endpoint(
581
  if not files:
582
  raise HTTPException(status_code=400, detail="No audio files provided")
583
 
584
- # Check if we have at least one file
585
- if len(files) == 0:
586
- raise HTTPException(status_code=400, detail="At least one audio file is required")
587
-
588
- print(f"🎙️ Starting voice cloning for: {voice_name}")
589
- print(f" Project ID: {project_id}")
590
- print(f" Number of samples: {len(files)}")
591
-
592
- # Save uploaded files temporarily
593
  temp_files = []
594
  for file in files:
595
  if not file.filename.lower().endswith(('.wav', '.mp3', '.flac')):
@@ -599,12 +344,9 @@ async def clone_voice_endpoint(
599
  with open(temp_path, "wb") as f:
600
  shutil.copyfileobj(file.file, f)
601
  temp_files.append(temp_path)
602
- print(f" Saved sample: {file.filename}")
603
 
604
- # Clone voice
605
  success, message = clone_voice(voice_name, temp_files, description)
606
 
607
- # Clean up temp files
608
  for temp_file in temp_files:
609
  try:
610
  os.remove(temp_file)
@@ -616,16 +358,12 @@ async def clone_voice_endpoint(
616
  "status": "success",
617
  "message": message,
618
  "voice_name": voice_name,
619
- "samples_used": len(temp_files),
620
- "project_id": project_id
621
  }
622
  else:
623
  raise HTTPException(status_code=500, detail=message)
624
 
625
- except HTTPException:
626
- raise
627
  except Exception as e:
628
- print(f"❌ Voice cloning error: {str(e)}")
629
  raise HTTPException(status_code=500, detail=f"Voice cloning failed: {str(e)}")
630
 
631
  @app.get("/api/voices")
@@ -642,51 +380,20 @@ async def list_voices():
642
  samples = list(voice_dir.glob("sample_*.wav"))
643
  voices.append({
644
  "name": voice_dir.name,
645
- "samples_count": len(samples),
646
- "samples": [str(sample.name) for sample in samples],
647
- "created_at": datetime.fromtimestamp(voice_dir.stat().st_mtime).isoformat()
648
  })
649
 
650
  return {"voices": voices}
651
  except Exception as e:
652
  raise HTTPException(status_code=500, detail=f"Failed to list voices: {str(e)}")
653
 
654
- @app.post("/api/change-voice")
655
- async def change_voice_style(request: ChangeVoiceRequest):
656
- """Change the voice style (reloads model)"""
657
- try:
658
- global model_loaded
659
-
660
- print(f"🔄 Changing voice style to: {request.voice_style}")
661
-
662
- # Reset model loaded flag to force reload
663
- model_loaded = False
664
-
665
- if load_tts_model(request.voice_style):
666
- return {
667
- "status": "success",
668
- "message": f"Voice style changed to {request.voice_style}",
669
- "current_voice_style": current_voice_style,
670
- "current_model": current_model,
671
- "voice_cloning_supported": voice_cloning_supported
672
- }
673
- else:
674
- raise HTTPException(status_code=500, detail="Failed to load new voice style")
675
-
676
- except Exception as e:
677
- raise HTTPException(status_code=500, detail=str(e))
678
-
679
  @app.get("/api/voice-styles")
680
  async def get_voice_styles():
681
  """Get available voice styles"""
682
  styles = {
683
  "default_female": "Default female voice (Tacotron2)",
684
  "clear_male": "Clear male voice (Tacotron2)",
685
- "male_deep": "Deep male voice (Tacotron2)",
686
- "male_medium": "Medium male voice (Glow-TTS)",
687
- "female_1": "Female voice 1 (Tacotron2)",
688
- "female_2": "Female voice 2 (Glow-TTS)",
689
- "voice_clone": "XTTS v2 - Voice cloning supported (requires voice samples)"
690
  }
691
  return {"voice_styles": styles}
692
 
@@ -697,12 +404,7 @@ async def get_status():
697
  "status": "running",
698
  "model_loaded": model_loaded,
699
  "current_model": current_model if model_loaded else "none",
700
- "current_voice_style": current_voice_style,
701
- "voice_cloning_supported": voice_cloning_supported,
702
- "device": DEVICE,
703
- "oci_configured": bool(OCI_UPLOAD_API_URL),
704
- "startup_time": app_startup_time.isoformat(),
705
- "uptime": str(datetime.now() - app_startup_time)
706
  }
707
 
708
  if __name__ == "__main__":
 
13
  from fastapi.middleware.cors import CORSMiddleware
14
  from pydantic import BaseModel
15
  import torch
 
16
 
17
  # Global state
18
  tts = None
 
77
  language: Optional[str] = "en"
78
  voice_style: Optional[str] = "default_female"
79
 
 
 
 
 
 
 
 
80
  class VoiceCloneRequest(BaseModel):
81
  project_id: str
82
  voice_name: str
 
93
  if not text or not isinstance(text, str):
94
  return "Hello"
95
 
 
96
  text = text.encode('ascii', 'ignore').decode('ascii')
 
 
97
  text = re.sub(r'[^\w\s\.\,\!\?\-\'\"\:]', '', text)
 
 
98
  text = re.sub(r'\s+', ' ', text)
99
 
 
100
  if len(text) > 10 and not re.search(r'[\.\!\?]$', text):
101
  text = text + '.'
102
 
103
  text = text.strip()
104
 
 
105
  if not text:
106
  text = "Hello world"
107
 
108
  return text
109
 
110
  def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voiceover"):
111
+ """Upload file to OCI"""
112
  try:
113
  if not OCI_UPLOAD_API_URL:
114
  print("⚠️ OCI upload skipped - OCI_UPLOAD_API_URL not configured")
115
  return {"status": "skipped", "message": "OCI upload disabled"}, None
116
 
 
 
 
117
  url = f"{OCI_UPLOAD_API_URL}/api/upload"
118
  print(f"🔗 Attempting upload to: {url}")
119
 
120
  with open(file_path, "rb") as f:
121
  files = {"file": (filename, f, "audio/wav")}
122
+ data = {"project_id": project_id, "subfolder": "voiceover"}
 
 
 
 
 
 
 
 
123
 
124
+ response = requests.post(url, files=files, data=data, timeout=30)
 
 
 
 
 
 
 
 
125
 
126
  if response.status_code == 200:
127
+ result = response.json()
128
+ if result.get("status") == "success":
129
+ return result, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  else:
131
+ return None, result.get("message", "Upload failed")
132
  else:
133
+ return None, f"Upload failed with status {response.status_code}"
134
 
135
+ except Exception as e:
136
+ return None, f"Upload error: {str(e)}"
 
 
 
 
 
 
 
 
137
 
138
  def get_voice_path(voice_name: str):
139
  """Get path to voice file"""
 
144
  if voice_path.is_dir():
145
  samples = list(voice_path.glob("sample_*.wav"))
146
  return str(samples[0]) if samples else None
147
+ return None
 
 
148
 
149
  def clone_voice(voice_name: str, audio_files: List[str], description: str = ""):
150
  """Clone a voice from audio samples"""
 
169
  """Check if the current model supports voice cloning"""
170
  return "xtts" in current_model.lower()
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  def load_tts_model(voice_style="default_female"):
173
+ """Load TTS model with lazy loading"""
174
  global tts, model_loaded, current_model, model_loading, current_voice_style, voice_cloning_supported
175
 
176
  if model_loading:
 
186
  try:
187
  from TTS.api import TTS
188
 
 
189
  model_options = {
190
  "default_female": {
191
  "name": "tts_models/en/ljspeech/tacotron2-DDC",
 
195
  "name": "tts_models/en/ek1/tacotron2",
196
  "description": "Tacotron2 - Clear male voice",
197
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  "voice_clone": {
199
  "name": "tts_models/multilingual/multi-dataset/xtts_v2",
200
  "description": "XTTS v2 - Voice cloning supported",
 
205
  current_voice_style = voice_style
206
 
207
  print(f"🚀 Loading {selected_model['description']}...")
 
208
 
 
209
  tts = TTS(selected_model["name"]).to(DEVICE)
210
 
211
  # Quick test
212
+ tts.tts_to_file(text="Hello", file_path="/tmp/test.wav")
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  model_loaded = True
215
  current_model = selected_model["name"]
 
223
  finally:
224
  model_loading = False
225
 
226
+ # Health check endpoints
227
  @app.get("/")
228
  async def root():
229
+ """Root endpoint"""
230
  return {
231
  "status": "running",
232
  "service": "TTS API",
233
+ "message": "Visit /docs for API documentation",
234
+ "model_loaded": model_loaded
235
  }
236
 
237
  @app.get("/health")
238
  async def health_check():
239
+ """Health check endpoint"""
240
  return {
241
  "status": "healthy",
242
  "timestamp": datetime.now().isoformat(),
 
243
  "model_loaded": model_loaded,
244
  "service": "TTS API"
245
  }
246
 
247
  @app.get("/api/health")
248
  async def api_health_check():
249
+ """API health check"""
250
  return {
251
  "status": "healthy",
252
  "model_loaded": model_loaded,
253
  "current_model": current_model if model_loaded else "none",
254
+ "device": DEVICE
 
 
 
 
255
  }
256
 
257
+ # API endpoints
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  @app.post("/api/tts")
259
  async def generate_tts(request: TTSRequest):
260
+ """Generate TTS for a single text"""
261
  try:
 
262
  if not model_loaded or current_voice_style != request.voice_style:
263
  print("🔄 Lazy loading TTS model...")
264
  if not load_tts_model(request.voice_style):
265
  return {
266
  "status": "error",
267
+ "message": "TTS model failed to load. Please try again."
 
 
268
  }
269
 
270
  print(f"📥 TTS request for project: {request.project_id}")
 
 
 
271
 
 
 
 
 
 
 
 
 
 
272
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
273
  filename = f"voiceover_{timestamp}.wav"
274
  output_path = f"/tmp/output/{filename}"
275
 
 
276
  os.makedirs(os.path.dirname(output_path), exist_ok=True)
277
 
278
+ cleaned_text = clean_text(request.text)
279
+
280
  # Get voice path if custom voice is requested
281
  speaker_wav = None
282
  if request.voice_name != "default":
 
284
  if not speaker_wav:
285
  return {
286
  "status": "error",
287
+ "message": f"Voice '{request.voice_name}' not found."
288
  }
 
289
 
290
+ if speaker_wav and voice_cloning_supported:
291
+ tts.tts_to_file(text=cleaned_text, file_path=output_path, speaker_wav=speaker_wav)
292
+ else:
293
+ tts.tts_to_file(text=cleaned_text, file_path=output_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
 
295
  if not os.path.exists(output_path):
296
+ raise Exception("TTS failed to create output file")
297
 
298
  file_size = os.path.getsize(output_path)
 
299
 
300
+ upload_result, error = upload_to_oci(output_path, filename, request.project_id)
 
 
 
301
 
302
  if error:
 
 
303
  return {
304
  "status": "success_local",
305
  "message": f"TTS generated locally (upload failed: {error})",
306
  "local_file": output_path,
307
  "filename": filename,
308
+ "file_size": file_size
 
 
 
 
 
309
  }
310
 
 
 
 
311
  try:
312
  os.remove(output_path)
313
+ except:
314
+ pass
 
315
 
316
  return {
317
  "status": "success",
318
  "message": "TTS generated and uploaded successfully",
319
  "filename": filename,
320
+ "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}")
 
 
 
 
321
  }
322
 
323
  except Exception as e:
 
324
  raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
325
 
326
  @app.post("/api/clone-voice")
 
335
  if not files:
336
  raise HTTPException(status_code=400, detail="No audio files provided")
337
 
 
 
 
 
 
 
 
 
 
338
  temp_files = []
339
  for file in files:
340
  if not file.filename.lower().endswith(('.wav', '.mp3', '.flac')):
 
344
  with open(temp_path, "wb") as f:
345
  shutil.copyfileobj(file.file, f)
346
  temp_files.append(temp_path)
 
347
 
 
348
  success, message = clone_voice(voice_name, temp_files, description)
349
 
 
350
  for temp_file in temp_files:
351
  try:
352
  os.remove(temp_file)
 
358
  "status": "success",
359
  "message": message,
360
  "voice_name": voice_name,
361
+ "samples_used": len(temp_files)
 
362
  }
363
  else:
364
  raise HTTPException(status_code=500, detail=message)
365
 
 
 
366
  except Exception as e:
 
367
  raise HTTPException(status_code=500, detail=f"Voice cloning failed: {str(e)}")
368
 
369
  @app.get("/api/voices")
 
380
  samples = list(voice_dir.glob("sample_*.wav"))
381
  voices.append({
382
  "name": voice_dir.name,
383
+ "samples_count": len(samples)
 
 
384
  })
385
 
386
  return {"voices": voices}
387
  except Exception as e:
388
  raise HTTPException(status_code=500, detail=f"Failed to list voices: {str(e)}")
389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  @app.get("/api/voice-styles")
391
  async def get_voice_styles():
392
  """Get available voice styles"""
393
  styles = {
394
  "default_female": "Default female voice (Tacotron2)",
395
  "clear_male": "Clear male voice (Tacotron2)",
396
+ "voice_clone": "XTTS v2 - Voice cloning supported"
 
 
 
 
397
  }
398
  return {"voice_styles": styles}
399
 
 
404
  "status": "running",
405
  "model_loaded": model_loaded,
406
  "current_model": current_model if model_loaded else "none",
407
+ "device": DEVICE
 
 
 
 
 
408
  }
409
 
410
  if __name__ == "__main__":