yukee1992 commited on
Commit
54639e2
Β·
verified Β·
1 Parent(s): 5133677

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -88
app.py CHANGED
@@ -32,73 +32,21 @@ app.add_middleware(
32
 
33
  # Configuration
34
  OCI_UPLOAD_API_URL = os.getenv("OCI_UPLOAD_API_URL", "http://localhost:7860")
35
- DEFAULT_MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
36
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
37
 
38
  print(f"βœ… Using device: {DEVICE}")
39
 
40
- # Initialize TTS model
 
 
 
 
41
  tts = None
42
  model_loaded = False
43
  current_model = ""
44
  voice_cloning_supported = False
45
-
46
- try:
47
- # Set environment variable to automatically accept terms
48
- os.environ["COQUI_TOS_AGREED"] = "1"
49
-
50
- print("πŸ” Starting TTS model loading process...")
51
-
52
- # Import TTS
53
- from TTS.api import TTS
54
-
55
- # Automatically respond to the TOS prompt
56
- import sys
57
- from io import StringIO
58
-
59
- # Capture the input prompt and automatically respond 'y'
60
- old_stdin = sys.stdin
61
- sys.stdin = StringIO('y\n')
62
-
63
- try:
64
- print("πŸš€ Loading XTTS model...")
65
-
66
- # Clear any potentially corrupted model files
67
- model_path = os.path.expanduser("~/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v2")
68
- if os.path.exists(model_path):
69
- print(f"πŸ”„ Clearing potentially corrupted model cache: {model_path}")
70
- import shutil
71
- shutil.rmtree(model_path, ignore_errors=True)
72
-
73
- # Try to load XTTS model with explicit download
74
- tts = TTS(DEFAULT_MODEL).to(DEVICE)
75
- model_loaded = True
76
- current_model = DEFAULT_MODEL
77
- voice_cloning_supported = True
78
- print("βœ… XTTS model loaded successfully with voice cloning support")
79
-
80
- except Exception as e:
81
- print(f"❌ XTTS model failed: {e}")
82
-
83
- # Try fallback model
84
- try:
85
- print("πŸ”„ Trying fallback model...")
86
- tts = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(DEVICE)
87
- model_loaded = True
88
- current_model = "tts_models/en/ljspeech/tacotron2-DDC"
89
- voice_cloning_supported = False
90
- print("βœ… Fallback TTS model loaded successfully (English only, no voice cloning)")
91
- except Exception as fallback_error:
92
- print(f"❌ Fallback model also failed: {fallback_error}")
93
- tts = None
94
-
95
- finally:
96
- # Restore stdin
97
- sys.stdin = old_stdin
98
-
99
- except Exception as e:
100
- print(f"❌ Failed to initialize TTS: {e}")
101
- tts = None
102
 
103
  # Pydantic models
104
  class TTSRequest(BaseModel):
@@ -125,14 +73,13 @@ def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voi
125
  if not OCI_UPLOAD_API_URL:
126
  return None, "OCI upload API URL not configured"
127
 
128
- # Use voiceover subfolder
129
  url = f"{OCI_UPLOAD_API_URL}/api/upload"
130
 
131
  with open(file_path, "rb") as f:
132
  files = {"file": (filename, f, "audio/wav")}
133
  data = {
134
  "project_id": project_id,
135
- "subfolder": "voiceover" # This creates project_id/voiceover/ structure
136
  }
137
 
138
  response = requests.post(url, files=files, data=data, timeout=30)
@@ -158,7 +105,7 @@ def upload_to_oci_with_retry(file_path: str, filename: str, project_id: str, fil
158
 
159
  if error:
160
  if attempt < max_retries - 1:
161
- wait_time = 2 ** attempt # Exponential backoff
162
  print(f"⏳ Upload failed, retrying in {wait_time}s: {error}")
163
  time.sleep(wait_time)
164
  continue
@@ -196,18 +143,15 @@ def clone_voice(voice_name: str, audio_files: List[str], description: str = ""):
196
  try:
197
  print(f"πŸŽ™οΈ Cloning voice: {voice_name}")
198
 
199
- # Create voice directory
200
  voice_dir = f"/tmp/voices/{voice_name}"
201
  os.makedirs(voice_dir, exist_ok=True)
202
 
203
- # Copy audio files to voice directory
204
  for i, audio_file in enumerate(audio_files):
205
  dest_path = f"{voice_dir}/sample_{i+1}.wav"
206
  shutil.copy2(audio_file, dest_path)
207
  print(f" Copied sample {i+1} to: {dest_path}")
208
 
209
  print(f"βœ… Voice cloning setup completed for {voice_name}")
210
-
211
  return True, f"Voice {voice_name} is ready for use"
212
 
213
  except Exception as e:
@@ -217,18 +161,78 @@ def supports_voice_cloning():
217
  """Check if the current model supports voice cloning"""
218
  return "xtts" in current_model.lower()
219
 
220
- # API endpoints
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  @app.post("/api/tts")
222
  async def generate_tts(request: TTSRequest):
223
- """Generate TTS for a single text"""
224
  try:
225
- if tts is None:
226
- return {
227
- "status": "error",
228
- "message": "TTS model not available. Please check the logs for details.",
229
- "requires_tos_acceptance": True,
230
- "tos_url": "https://coqui.ai/cpml.txt"
231
- }
 
 
232
 
233
  print(f"πŸ“₯ TTS request for project: {request.project_id}")
234
  print(f" Text length: {len(request.text)} characters")
@@ -260,7 +264,6 @@ async def generate_tts(request: TTSRequest):
260
 
261
  # Generate TTS based on model capabilities
262
  if supports_voice_cloning():
263
- # XTTS model with voice cloning support
264
  tts.tts_to_file(
265
  text=request.text,
266
  speaker_wav=speaker_wav,
@@ -268,7 +271,6 @@ async def generate_tts(request: TTSRequest):
268
  file_path=output_path
269
  )
270
  else:
271
- # Fallback model (Tacotron2)
272
  tts.tts_to_file(
273
  text=request.text,
274
  file_path=output_path
@@ -283,7 +285,6 @@ async def generate_tts(request: TTSRequest):
283
 
284
  if error:
285
  print(f"❌ OCI upload failed: {error}")
286
- # Still return the local file path if upload fails
287
  return {
288
  "status": "partial_success",
289
  "message": f"TTS generated but upload failed: {error}",
@@ -316,8 +317,10 @@ async def generate_tts(request: TTSRequest):
316
  async def batch_generate_tts(request: BatchTTSRequest):
317
  """Generate TTS for multiple texts with sequential naming"""
318
  try:
319
- if tts is None:
320
- raise HTTPException(status_code=500, detail="TTS model not loaded")
 
 
321
 
322
  print(f"πŸ“₯ Batch TTS request for project: {request.project_id}")
323
  print(f" Number of texts: {len(request.texts)}")
@@ -349,7 +352,6 @@ async def batch_generate_tts(request: BatchTTSRequest):
349
 
350
  # Generate TTS based on model capabilities
351
  if supports_voice_cloning():
352
- # XTTS model with voice cloning support
353
  tts.tts_to_file(
354
  text=text,
355
  speaker_wav=speaker_wav,
@@ -357,7 +359,6 @@ async def batch_generate_tts(request: BatchTTSRequest):
357
  file_path=output_path
358
  )
359
  else:
360
- # Fallback model (Tacotron2)
361
  tts.tts_to_file(
362
  text=text,
363
  file_path=output_path
@@ -540,13 +541,35 @@ async def health_check():
540
  """Health check endpoint"""
541
  return {
542
  "status": "healthy",
543
- "tts_loaded": tts is not None,
544
  "model": current_model,
545
  "voice_cloning_supported": voice_cloning_supported,
546
  "device": DEVICE,
 
547
  "timestamp": datetime.now().isoformat()
548
  }
549
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  @app.get("/")
551
  async def root():
552
  """Root endpoint with API information"""
@@ -558,10 +581,11 @@ async def root():
558
  "POST /api/upload-voice": "Upload a voice sample for cloning",
559
  "POST /api/clone-voice": "Clone a voice from multiple samples",
560
  "GET /api/voices": "List available voices",
561
- "GET /api/health": "Health check"
 
562
  },
563
- "model_loaded": tts is not None,
564
- "model_name": current_model if tts else "None",
565
  "voice_cloning_supported": supports_voice_cloning()
566
  }
567
 
@@ -569,7 +593,6 @@ if __name__ == "__main__":
569
  import uvicorn
570
  print("πŸš€ Starting TTS API with Coqui TTS and Voice Cloning...")
571
  print("πŸ“Š API endpoints available at: http://localhost:7860/")
572
- print("πŸ“š Documentation available at: http://localhost:7860/docs")
573
- print(f"πŸ”Š Model: {current_model}")
574
- print(f"πŸŽ™οΈ Voice cloning: {'Supported' if voice_cloning_supported else 'Not supported'}")
575
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
32
 
33
  # Configuration
34
  OCI_UPLOAD_API_URL = os.getenv("OCI_UPLOAD_API_URL", "http://localhost:7860")
 
35
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
36
 
37
  print(f"βœ… Using device: {DEVICE}")
38
 
39
+ # Model configuration
40
+ MODEL_REPO_ID = "coqui/XTTS-v2"
41
+ MODEL_CACHE_DIR = "/tmp/tts_models"
42
+
43
+ # Global state
44
  tts = None
45
  model_loaded = False
46
  current_model = ""
47
  voice_cloning_supported = False
48
+ model_loading = False
49
+ model_load_attempts = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  # Pydantic models
52
  class TTSRequest(BaseModel):
 
73
  if not OCI_UPLOAD_API_URL:
74
  return None, "OCI upload API URL not configured"
75
 
 
76
  url = f"{OCI_UPLOAD_API_URL}/api/upload"
77
 
78
  with open(file_path, "rb") as f:
79
  files = {"file": (filename, f, "audio/wav")}
80
  data = {
81
  "project_id": project_id,
82
+ "subfolder": "voiceover"
83
  }
84
 
85
  response = requests.post(url, files=files, data=data, timeout=30)
 
105
 
106
  if error:
107
  if attempt < max_retries - 1:
108
+ wait_time = 2 ** attempt
109
  print(f"⏳ Upload failed, retrying in {wait_time}s: {error}")
110
  time.sleep(wait_time)
111
  continue
 
143
  try:
144
  print(f"πŸŽ™οΈ Cloning voice: {voice_name}")
145
 
 
146
  voice_dir = f"/tmp/voices/{voice_name}"
147
  os.makedirs(voice_dir, exist_ok=True)
148
 
 
149
  for i, audio_file in enumerate(audio_files):
150
  dest_path = f"{voice_dir}/sample_{i+1}.wav"
151
  shutil.copy2(audio_file, dest_path)
152
  print(f" Copied sample {i+1} to: {dest_path}")
153
 
154
  print(f"βœ… Voice cloning setup completed for {voice_name}")
 
155
  return True, f"Voice {voice_name} is ready for use"
156
 
157
  except Exception as e:
 
161
  """Check if the current model supports voice cloning"""
162
  return "xtts" in current_model.lower()
163
 
164
+ def load_tts_model():
165
+ """Load TTS model with retry logic and proper error handling"""
166
+ global tts, model_loaded, current_model, voice_cloning_supported, model_loading, model_load_attempts
167
+
168
+ if model_loading:
169
+ print("⏳ Model is already being loaded...")
170
+ return False
171
+
172
+ model_loading = True
173
+ model_load_attempts += 1
174
+
175
+ try:
176
+ from TTS.api import TTS
177
+
178
+ # Handle TOS acceptance automatically
179
+ import sys
180
+ from io import StringIO
181
+
182
+ old_stdin = sys.stdin
183
+ sys.stdin = StringIO('y\n')
184
+
185
+ try:
186
+ print("πŸš€ Loading XTTS model...")
187
+
188
+ # Try to load XTTS model
189
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(DEVICE)
190
+
191
+ model_loaded = True
192
+ current_model = "xtts_v2"
193
+ voice_cloning_supported = True
194
+ print("βœ… XTTS model loaded successfully")
195
+ return True
196
+
197
+ except Exception as e:
198
+ print(f"❌ XTTS model loading failed: {e}")
199
+
200
+ # Try fallback model
201
+ try:
202
+ print("πŸ”„ Trying fallback model...")
203
+ tts = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(DEVICE)
204
+ model_loaded = True
205
+ current_model = "tacotron2-DDC"
206
+ voice_cloning_supported = False
207
+ print("βœ… Fallback model loaded successfully")
208
+ return True
209
+ except Exception as fallback_error:
210
+ print(f"❌ Fallback model also failed: {fallback_error}")
211
+ return False
212
+
213
+ finally:
214
+ sys.stdin = old_stdin
215
+
216
+ except Exception as e:
217
+ print(f"❌ Failed to initialize TTS: {e}")
218
+ return False
219
+ finally:
220
+ model_loading = False
221
+
222
+ # API endpoints with lazy loading
223
  @app.post("/api/tts")
224
  async def generate_tts(request: TTSRequest):
225
+ """Generate TTS for a single text with lazy model loading"""
226
  try:
227
+ # Lazy load model on first request
228
+ if not model_loaded:
229
+ if not load_tts_model():
230
+ return {
231
+ "status": "error",
232
+ "message": "TTS model failed to load. Please check the logs.",
233
+ "requires_tos_acceptance": True,
234
+ "tos_url": "https://coqui.ai/cpml.txt"
235
+ }
236
 
237
  print(f"πŸ“₯ TTS request for project: {request.project_id}")
238
  print(f" Text length: {len(request.text)} characters")
 
264
 
265
  # Generate TTS based on model capabilities
266
  if supports_voice_cloning():
 
267
  tts.tts_to_file(
268
  text=request.text,
269
  speaker_wav=speaker_wav,
 
271
  file_path=output_path
272
  )
273
  else:
 
274
  tts.tts_to_file(
275
  text=request.text,
276
  file_path=output_path
 
285
 
286
  if error:
287
  print(f"❌ OCI upload failed: {error}")
 
288
  return {
289
  "status": "partial_success",
290
  "message": f"TTS generated but upload failed: {error}",
 
317
  async def batch_generate_tts(request: BatchTTSRequest):
318
  """Generate TTS for multiple texts with sequential naming"""
319
  try:
320
+ # Lazy load model on first request
321
+ if not model_loaded:
322
+ if not load_tts_model():
323
+ raise HTTPException(status_code=500, detail="TTS model failed to load")
324
 
325
  print(f"πŸ“₯ Batch TTS request for project: {request.project_id}")
326
  print(f" Number of texts: {len(request.texts)}")
 
352
 
353
  # Generate TTS based on model capabilities
354
  if supports_voice_cloning():
 
355
  tts.tts_to_file(
356
  text=text,
357
  speaker_wav=speaker_wav,
 
359
  file_path=output_path
360
  )
361
  else:
 
362
  tts.tts_to_file(
363
  text=text,
364
  file_path=output_path
 
541
  """Health check endpoint"""
542
  return {
543
  "status": "healthy",
544
+ "tts_loaded": model_loaded,
545
  "model": current_model,
546
  "voice_cloning_supported": voice_cloning_supported,
547
  "device": DEVICE,
548
+ "load_attempts": model_load_attempts,
549
  "timestamp": datetime.now().isoformat()
550
  }
551
 
552
+ @app.post("/api/reload-model")
553
+ async def reload_model():
554
+ """Force reload the TTS model"""
555
+ global tts, model_loaded, current_model, voice_cloning_supported
556
+
557
+ # Clear current model
558
+ tts = None
559
+ model_loaded = False
560
+ current_model = ""
561
+ voice_cloning_supported = False
562
+
563
+ # Try to reload
564
+ success = load_tts_model()
565
+
566
+ return {
567
+ "status": "success" if success else "error",
568
+ "message": "Model reloaded successfully" if success else "Failed to reload model",
569
+ "model_loaded": model_loaded,
570
+ "model": current_model
571
+ }
572
+
573
  @app.get("/")
574
  async def root():
575
  """Root endpoint with API information"""
 
581
  "POST /api/upload-voice": "Upload a voice sample for cloning",
582
  "POST /api/clone-voice": "Clone a voice from multiple samples",
583
  "GET /api/voices": "List available voices",
584
+ "GET /api/health": "Health check",
585
+ "POST /api/reload-model": "Reload TTS model"
586
  },
587
+ "model_loaded": model_loaded,
588
+ "model_name": current_model if model_loaded else "None",
589
  "voice_cloning_supported": supports_voice_cloning()
590
  }
591
 
 
593
  import uvicorn
594
  print("πŸš€ Starting TTS API with Coqui TTS and Voice Cloning...")
595
  print("πŸ“Š API endpoints available at: http://localhost:7860/")
596
+ print("πŸ’‘ Model will be loaded on first request to save memory")
597
+ print("πŸ”„ Use /api/reload-model to force reload if needed")
 
598
  uvicorn.run(app, host="0.0.0.0", port=7860)