hadadrjt commited on
Commit
d24817e
·
1 Parent(s): 4388640

Pocket TTS: Enforce resource management.

Browse files
Files changed (1) hide show
  1. app.py +408 -30
app.py CHANGED
@@ -164,25 +164,330 @@ import gc
164
  import atexit
165
 
166
  BACKGROUND_CLEANUP_INTERVAL = 300
167
- VOICE_STATE_CACHE_MAXIMUM_SIZE = 16
168
- VOICE_STATE_CACHE_CLEANUP_THRESHOLD = 12
 
 
 
 
 
 
 
169
 
170
  background_cleanup_thread = None
171
  background_cleanup_stop_event = threading.Event()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  def perform_background_cleanup_cycle():
 
 
174
  while not background_cleanup_stop_event.is_set():
175
- background_cleanup_stop_event.wait(timeout=BACKGROUND_CLEANUP_INTERVAL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
- if not background_cleanup_stop_event.is_set():
 
 
 
 
 
178
  cleanup_expired_temporary_files()
179
- force_garbage_collection()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  def start_background_cleanup_thread():
182
  global background_cleanup_thread
183
 
184
  if background_cleanup_thread is None or not background_cleanup_thread.is_alive():
185
  background_cleanup_stop_event.clear()
 
186
 
187
  background_cleanup_thread = threading.Thread(
188
  target=perform_background_cleanup_cycle,
@@ -194,16 +499,11 @@ def start_background_cleanup_thread():
194
 
195
  def stop_background_cleanup_thread():
196
  background_cleanup_stop_event.set()
 
197
 
198
  if background_cleanup_thread is not None and background_cleanup_thread.is_alive():
199
  background_cleanup_thread.join(timeout=5)
200
 
201
- def force_garbage_collection():
202
- gc.collect()
203
-
204
- if torch.cuda.is_available():
205
- torch.cuda.empty_cache()
206
-
207
  atexit.register(stop_background_cleanup_thread)
208
 
209
  # =============================================================================
@@ -243,6 +543,8 @@ def convert_audio_to_pcm_wav(input_path):
243
  with temporary_files_lock:
244
  temporary_files_registry[output_file.name] = time.time()
245
 
 
 
246
  return output_file.name
247
 
248
  except Exception as conversion_error:
@@ -285,6 +587,23 @@ class TextToSpeechManager:
285
 
286
  self.voice_state_cache_access_timestamps = {}
287
  self.voice_state_cache_lock = threading.Lock()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
  def load_or_get_model(
290
  self,
@@ -311,6 +630,8 @@ class TextToSpeechManager:
311
  Returns:
312
  TTSModel: Loaded and configured TTS model instance
313
  """
 
 
314
  # Process and validate input parameters with defaults
315
  processed_variant = str(model_variant or DEFAULT_MODEL_VARIANT).strip()
316
  processed_temperature = float(temperature) if temperature is not None else DEFAULT_TEMPERATURE
@@ -327,15 +648,20 @@ class TextToSpeechManager:
327
  "eos_threshold": processed_eos_threshold
328
  }
329
 
330
- # Load new model if configuration changed or no model loaded
331
- if self.loaded_model is None or self.current_configuration != requested_configuration:
332
- self.clear_voice_state_cache_completely()
 
 
 
 
 
333
 
334
- self.loaded_model = TTSModel.load_model(**requested_configuration)
335
- self.current_configuration = requested_configuration
336
- self.voice_state_cache = {} # Clear cache on model change
337
 
338
- return self.loaded_model
339
 
340
  def clear_voice_state_cache_completely(self):
341
  with self.voice_state_cache_lock:
@@ -355,6 +681,23 @@ class TextToSpeechManager:
355
  with self.voice_state_cache_lock:
356
 
357
  if len(self.voice_state_cache) <= VOICE_STATE_CACHE_CLEANUP_THRESHOLD:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  return
359
 
360
  sorted_voice_names_by_access_time = sorted(
@@ -395,9 +738,16 @@ class TextToSpeechManager:
395
  self.voice_state_cache_access_timestamps[validated_voice] = time.time()
396
  return self.voice_state_cache[validated_voice]
397
 
 
 
 
398
  if len(self.voice_state_cache) >= VOICE_STATE_CACHE_MAXIMUM_SIZE:
399
  self.evict_least_recently_used_voice_states()
400
 
 
 
 
 
401
  # Compute and cache voice state if not already cached
402
  if validated_voice not in self.voice_state_cache:
403
 
@@ -426,6 +776,9 @@ class TextToSpeechManager:
426
  Returns:
427
  Voice state tensor extracted from the audio file
428
  """
 
 
 
429
 
430
  converted_audio_path = convert_audio_to_pcm_wav(audio_file_path)
431
 
@@ -447,15 +800,23 @@ class TextToSpeechManager:
447
  Returns:
448
  torch.Tensor: Generated audio waveform
449
  """
450
- # Apply custom frames setting if enabled
451
- processed_frames = int(frames_after_eos) if enable_custom_frames else None
452
-
453
- return self.loaded_model.generate_audio(
454
- model_state=voice_state,
455
- text_to_generate=text_content,
456
- frames_after_eos=processed_frames,
457
- copy_state=True
458
- )
 
 
 
 
 
 
 
 
459
 
460
  def save_audio_to_file(self, audio_tensor):
461
  """
@@ -470,9 +831,14 @@ class TextToSpeechManager:
470
  Returns:
471
  str: Path to the saved temporary WAV file
472
  """
 
 
 
 
 
 
473
  # Convert tensor to numpy array for scipy
474
  audio_numpy_data = audio_tensor.numpy()
475
- audio_sample_rate = self.loaded_model.sample_rate
476
 
477
  # Create temporary file and write audio data
478
  output_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
@@ -482,6 +848,8 @@ class TextToSpeechManager:
482
  with temporary_files_lock:
483
  temporary_files_registry[output_file.name] = time.time()
484
 
 
 
485
  return output_file.name
486
 
487
 
@@ -625,7 +993,10 @@ def perform_speech_generation(
625
  global is_currently_generating, stop_generation_requested
626
 
627
  # Run cleanup before starting new generation
628
- cleanup_expired_temporary_files()
 
 
 
629
 
630
  # Validate text input
631
  is_valid, validation_result = validate_text_input(text_input)
@@ -701,6 +1072,9 @@ def perform_speech_generation(
701
  except gr.Error:
702
  raise
703
 
 
 
 
704
  except Exception as generation_error:
705
  raise gr.Error(f"Speech generation failed: {str(generation_error)}")
706
 
@@ -712,11 +1086,15 @@ def perform_speech_generation(
712
 
713
  if generated_audio_tensor is not None:
714
  del generated_audio_tensor
 
715
 
716
  if cloned_voice_state_tensor is not None:
717
  del cloned_voice_state_tensor
 
718
 
719
- force_garbage_collection()
 
 
720
 
721
 
722
  # =============================================================================
 
164
  import atexit
165
 
166
  BACKGROUND_CLEANUP_INTERVAL = 300
167
+ VOICE_STATE_CACHE_MAXIMUM_SIZE = 8
168
+ VOICE_STATE_CACHE_CLEANUP_THRESHOLD = 4
169
+
170
+ MAXIMUM_MEMORY_USAGE = 1 * 1024 * 1024 * 1024
171
+
172
+ MEMORY_WARNING_THRESHOLD = int(0.7 * MAXIMUM_MEMORY_USAGE)
173
+ MEMORY_CRITICAL_THRESHOLD = int(0.85 * MAXIMUM_MEMORY_USAGE)
174
+ MEMORY_CHECK_INTERVAL = 30
175
+ MEMORY_IDLE_TARGET = int(0.5 * MAXIMUM_MEMORY_USAGE)
176
 
177
  background_cleanup_thread = None
178
  background_cleanup_stop_event = threading.Event()
179
+ background_cleanup_trigger_event = threading.Event()
180
+
181
+ memory_enforcement_lock = threading.Lock()
182
+
183
+ text_to_speech_manager = None
184
+
185
+ def get_current_memory_usage():
186
+ try:
187
+ with open('/proc/self/status', 'r') as status_file:
188
+ for line in status_file:
189
+
190
+ if line.startswith('VmRSS:'):
191
+ memory_value_kb = int(line.split()[1])
192
+ return memory_value_kb * 1024
193
+
194
+ except Exception:
195
+ pass
196
+
197
+ try:
198
+ with open('/proc/self/statm', 'r') as statm_file:
199
+ statm_values = statm_file.read().split()
200
+ resident_pages = int(statm_values[1])
201
+ page_size = os.sysconf('SC_PAGE_SIZE')
202
+ return resident_pages * page_size
203
+
204
+ except Exception:
205
+ pass
206
+
207
+ try:
208
+ import resource
209
+ memory_usage_kilobytes = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
210
+
211
+ import platform
212
+ if platform.system() == "Darwin":
213
+ return memory_usage_kilobytes
214
+
215
+ else:
216
+ return memory_usage_kilobytes * 1024
217
+
218
+ except Exception:
219
+ pass
220
+
221
+ return 0
222
+
223
+ def check_if_generation_is_currently_active():
224
+ with generation_state_lock:
225
+ return is_currently_generating
226
+
227
+ def is_memory_usage_within_limit():
228
+ current_memory_usage = get_current_memory_usage()
229
+ return current_memory_usage < MAXIMUM_MEMORY_USAGE
230
+
231
+ def is_memory_usage_approaching_limit():
232
+ current_memory_usage = get_current_memory_usage()
233
+ return current_memory_usage >= MEMORY_WARNING_THRESHOLD
234
+
235
+ def is_memory_usage_critical():
236
+ current_memory_usage = get_current_memory_usage()
237
+ return current_memory_usage >= MEMORY_CRITICAL_THRESHOLD
238
+
239
+ def is_memory_above_idle_target():
240
+ current_memory_usage = get_current_memory_usage()
241
+ return current_memory_usage > MEMORY_IDLE_TARGET
242
+
243
+ def force_garbage_collection():
244
+ gc.collect(0)
245
+ gc.collect(1)
246
+ gc.collect(2)
247
+
248
+ if torch.cuda.is_available():
249
+ torch.cuda.empty_cache()
250
+ torch.cuda.synchronize()
251
+
252
+ def memory_cleanup():
253
+ force_garbage_collection()
254
+
255
+ try:
256
+ import ctypes
257
+
258
+ libc = ctypes.CDLL("libc.so.6")
259
+ libc.malloc_trim(0)
260
+
261
+ except Exception:
262
+ pass
263
+
264
+ force_garbage_collection()
265
+
266
+ def perform_memory_cleanup():
267
+ global text_to_speech_manager
268
+
269
+ force_garbage_collection()
270
+
271
+ if text_to_speech_manager is not None:
272
+ text_to_speech_manager.evict_least_recently_used_voice_states()
273
+
274
+ memory_cleanup()
275
+
276
+ def enforce_memory_limit_if_exceeded():
277
+ global text_to_speech_manager
278
+
279
+ with memory_enforcement_lock:
280
+ generation_is_active = check_if_generation_is_currently_active()
281
+
282
+ current_memory_usage = get_current_memory_usage()
283
+
284
+ if current_memory_usage < MEMORY_WARNING_THRESHOLD:
285
+ return True
286
+
287
+ force_garbage_collection()
288
+ current_memory_usage = get_current_memory_usage()
289
+
290
+ if current_memory_usage < MEMORY_WARNING_THRESHOLD:
291
+ return True
292
+
293
+ if text_to_speech_manager is not None:
294
+ text_to_speech_manager.evict_least_recently_used_voice_states()
295
+
296
+ memory_cleanup()
297
+ current_memory_usage = get_current_memory_usage()
298
+
299
+ if current_memory_usage < MEMORY_CRITICAL_THRESHOLD:
300
+ return True
301
+
302
+ if text_to_speech_manager is not None:
303
+ text_to_speech_manager.clear_voice_state_cache_completely()
304
+
305
+ cleanup_all_temporary_files_immediately()
306
+ memory_cleanup()
307
+ current_memory_usage = get_current_memory_usage()
308
+
309
+ if current_memory_usage < MAXIMUM_MEMORY_USAGE:
310
+ return True
311
+
312
+ if generation_is_active:
313
+ return current_memory_usage < MAXIMUM_MEMORY_USAGE
314
+
315
+ if text_to_speech_manager is not None:
316
+ text_to_speech_manager.unload_model_completely()
317
+
318
+ memory_cleanup()
319
+ current_memory_usage = get_current_memory_usage()
320
+
321
+ return current_memory_usage < MAXIMUM_MEMORY_USAGE
322
+
323
+ def perform_idle_memory_reduction():
324
+ global text_to_speech_manager
325
+
326
+ if check_if_generation_is_currently_active():
327
+ return
328
+
329
+ with memory_enforcement_lock:
330
+ current_memory_usage = get_current_memory_usage()
331
+
332
+ if current_memory_usage <= MEMORY_IDLE_TARGET:
333
+ return
334
+
335
+ force_garbage_collection()
336
+ current_memory_usage = get_current_memory_usage()
337
+
338
+ if current_memory_usage <= MEMORY_IDLE_TARGET:
339
+ return
340
+
341
+ if check_if_generation_is_currently_active():
342
+ return
343
+
344
+ if text_to_speech_manager is not None:
345
+ text_to_speech_manager.evict_least_recently_used_voice_states()
346
+
347
+ memory_cleanup()
348
+ current_memory_usage = get_current_memory_usage()
349
+
350
+ if current_memory_usage <= MEMORY_IDLE_TARGET:
351
+ return
352
+
353
+ if check_if_generation_is_currently_active():
354
+ return
355
+
356
+ if text_to_speech_manager is not None:
357
+ text_to_speech_manager.clear_voice_state_cache_completely()
358
+
359
+ memory_cleanup()
360
+ current_memory_usage = get_current_memory_usage()
361
+
362
+ if current_memory_usage <= MEMORY_IDLE_TARGET:
363
+ return
364
+
365
+ if check_if_generation_is_currently_active():
366
+ return
367
+
368
+ if text_to_speech_manager is not None:
369
+ text_to_speech_manager.unload_model_completely()
370
+
371
+ memory_cleanup()
372
+
373
+ def cleanup_all_temporary_files_immediately():
374
+ with temporary_files_lock:
375
+ for file_path in list(temporary_files_registry.keys()):
376
+ try:
377
+ if os.path.exists(file_path):
378
+ os.remove(file_path)
379
+ del temporary_files_registry[file_path]
380
+
381
+ except Exception:
382
+ pass
383
+
384
+ def has_temporary_files_pending_cleanup():
385
+ with temporary_files_lock:
386
+
387
+ if len(temporary_files_registry) == 0:
388
+ return False
389
+
390
+ current_timestamp = time.time()
391
+
392
+ for file_path, creation_timestamp in temporary_files_registry.items():
393
+ if current_timestamp - creation_timestamp > TEMPORARY_FILE_LIFETIME_SECONDS:
394
+ return True
395
+
396
+ return False
397
+
398
+ def has_any_temporary_files_registered():
399
+ with temporary_files_lock:
400
+ return len(temporary_files_registry) > 0
401
+
402
+ def calculate_time_until_next_file_expiration():
403
+ with temporary_files_lock:
404
+ if len(temporary_files_registry) == 0:
405
+ return None
406
+
407
+ current_timestamp = time.time()
408
+ minimum_time_until_expiration = None
409
+
410
+ for file_path, creation_timestamp in temporary_files_registry.items():
411
+ time_since_creation = current_timestamp - creation_timestamp
412
+ time_until_expiration = TEMPORARY_FILE_LIFETIME_SECONDS - time_since_creation
413
+
414
+ if time_until_expiration <= 0:
415
+ return 0
416
+
417
+ if minimum_time_until_expiration is None or time_until_expiration < minimum_time_until_expiration:
418
+ minimum_time_until_expiration = time_until_expiration
419
+
420
+ return minimum_time_until_expiration
421
 
422
  def perform_background_cleanup_cycle():
423
+ last_memory_check_timestamp = 0
424
+
425
  while not background_cleanup_stop_event.is_set():
426
+ time_until_next_expiration = calculate_time_until_next_file_expiration()
427
+ current_timestamp = time.time()
428
+ time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
429
+
430
+ if time_until_next_expiration is not None:
431
+ if time_until_next_expiration <= 0:
432
+ wait_duration = 1
433
+
434
+ else:
435
+ wait_duration = min(
436
+ time_until_next_expiration + 1,
437
+ MEMORY_CHECK_INTERVAL,
438
+ BACKGROUND_CLEANUP_INTERVAL
439
+ )
440
+ else:
441
+ if is_memory_above_idle_target() and not check_if_generation_is_currently_active():
442
+ wait_duration = MEMORY_CHECK_INTERVAL
443
+
444
+ else:
445
+ background_cleanup_trigger_event.clear()
446
+ triggered = background_cleanup_trigger_event.wait(timeout=BACKGROUND_CLEANUP_INTERVAL)
447
+
448
+ if background_cleanup_stop_event.is_set():
449
+ break
450
+
451
+ if triggered:
452
+ continue
453
+
454
+ else:
455
+ if not check_if_generation_is_currently_active():
456
+ perform_idle_memory_reduction()
457
+
458
+ continue
459
 
460
+ background_cleanup_stop_event.wait(timeout=wait_duration)
461
+
462
+ if background_cleanup_stop_event.is_set():
463
+ break
464
+
465
+ if has_temporary_files_pending_cleanup():
466
  cleanup_expired_temporary_files()
467
+
468
+ current_timestamp = time.time()
469
+ time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
470
+
471
+ if time_since_last_memory_check >= MEMORY_CHECK_INTERVAL:
472
+ if not check_if_generation_is_currently_active():
473
+
474
+ if is_memory_usage_critical():
475
+ enforce_memory_limit_if_exceeded()
476
+
477
+ elif is_memory_above_idle_target():
478
+ perform_idle_memory_reduction()
479
+
480
+ last_memory_check_timestamp = current_timestamp
481
+
482
+ def trigger_background_cleanup_check():
483
+ background_cleanup_trigger_event.set()
484
 
485
  def start_background_cleanup_thread():
486
  global background_cleanup_thread
487
 
488
  if background_cleanup_thread is None or not background_cleanup_thread.is_alive():
489
  background_cleanup_stop_event.clear()
490
+ background_cleanup_trigger_event.clear()
491
 
492
  background_cleanup_thread = threading.Thread(
493
  target=perform_background_cleanup_cycle,
 
499
 
500
  def stop_background_cleanup_thread():
501
  background_cleanup_stop_event.set()
502
+ background_cleanup_trigger_event.set()
503
 
504
  if background_cleanup_thread is not None and background_cleanup_thread.is_alive():
505
  background_cleanup_thread.join(timeout=5)
506
 
 
 
 
 
 
 
507
  atexit.register(stop_background_cleanup_thread)
508
 
509
  # =============================================================================
 
543
  with temporary_files_lock:
544
  temporary_files_registry[output_file.name] = time.time()
545
 
546
+ trigger_background_cleanup_check()
547
+
548
  return output_file.name
549
 
550
  except Exception as conversion_error:
 
587
 
588
  self.voice_state_cache_access_timestamps = {}
589
  self.voice_state_cache_lock = threading.Lock()
590
+ self.model_lock = threading.Lock()
591
+
592
+ def is_model_loaded(self):
593
+ with self.model_lock:
594
+ return self.loaded_model is not None
595
+
596
+ def unload_model_completely(self):
597
+ with self.model_lock:
598
+ self.clear_voice_state_cache_completely()
599
+
600
+ if self.loaded_model is not None:
601
+ del self.loaded_model
602
+ self.loaded_model = None
603
+
604
+ self.current_configuration = {}
605
+
606
+ memory_cleanup()
607
 
608
  def load_or_get_model(
609
  self,
 
630
  Returns:
631
  TTSModel: Loaded and configured TTS model instance
632
  """
633
+ perform_memory_cleanup()
634
+
635
  # Process and validate input parameters with defaults
636
  processed_variant = str(model_variant or DEFAULT_MODEL_VARIANT).strip()
637
  processed_temperature = float(temperature) if temperature is not None else DEFAULT_TEMPERATURE
 
648
  "eos_threshold": processed_eos_threshold
649
  }
650
 
651
+ with self.model_lock:
652
+ # Load new model if configuration changed or no model loaded
653
+ if self.loaded_model is None or self.current_configuration != requested_configuration:
654
+ if self.loaded_model is not None:
655
+ self.clear_voice_state_cache_completely()
656
+ del self.loaded_model
657
+ self.loaded_model = None
658
+ memory_cleanup()
659
 
660
+ self.loaded_model = TTSModel.load_model(**requested_configuration)
661
+ self.current_configuration = requested_configuration
662
+ self.voice_state_cache = {} # Clear cache on model change
663
 
664
+ return self.loaded_model
665
 
666
  def clear_voice_state_cache_completely(self):
667
  with self.voice_state_cache_lock:
 
681
  with self.voice_state_cache_lock:
682
 
683
  if len(self.voice_state_cache) <= VOICE_STATE_CACHE_CLEANUP_THRESHOLD:
684
+ if len(self.voice_state_cache) > 0:
685
+ sorted_voice_names_by_access_time = sorted(
686
+ self.voice_state_cache_access_timestamps.keys(),
687
+ key=lambda voice_name: self.voice_state_cache_access_timestamps[voice_name]
688
+ )
689
+
690
+ number_of_entries_to_remove = max(1, len(self.voice_state_cache) // 2)
691
+
692
+ for index in range(min(number_of_entries_to_remove, len(sorted_voice_names_by_access_time))):
693
+ voice_name_to_remove = sorted_voice_names_by_access_time[index]
694
+ voice_state_tensor = self.voice_state_cache.pop(voice_name_to_remove, None)
695
+ self.voice_state_cache_access_timestamps.pop(voice_name_to_remove, None)
696
+
697
+ if voice_state_tensor is not None:
698
+ del voice_state_tensor
699
+
700
+ force_garbage_collection()
701
  return
702
 
703
  sorted_voice_names_by_access_time = sorted(
 
738
  self.voice_state_cache_access_timestamps[validated_voice] = time.time()
739
  return self.voice_state_cache[validated_voice]
740
 
741
+ if is_memory_usage_approaching_limit():
742
+ self.evict_least_recently_used_voice_states()
743
+
744
  if len(self.voice_state_cache) >= VOICE_STATE_CACHE_MAXIMUM_SIZE:
745
  self.evict_least_recently_used_voice_states()
746
 
747
+ with self.model_lock:
748
+ if self.loaded_model is None:
749
+ raise RuntimeError("TTS model is not loaded. Please try again.")
750
+
751
  # Compute and cache voice state if not already cached
752
  if validated_voice not in self.voice_state_cache:
753
 
 
776
  Returns:
777
  Voice state tensor extracted from the audio file
778
  """
779
+ with self.model_lock:
780
+ if self.loaded_model is None:
781
+ raise RuntimeError("TTS model is not loaded. Please try again.")
782
 
783
  converted_audio_path = convert_audio_to_pcm_wav(audio_file_path)
784
 
 
800
  Returns:
801
  torch.Tensor: Generated audio waveform
802
  """
803
+ with self.model_lock:
804
+ if self.loaded_model is None:
805
+ raise RuntimeError("TTS model is not loaded. Please try again.")
806
+
807
+ # Apply custom frames setting if enabled
808
+ processed_frames = int(frames_after_eos) if enable_custom_frames else None
809
+
810
+ generated_audio = self.loaded_model.generate_audio(
811
+ model_state=voice_state,
812
+ text_to_generate=text_content,
813
+ frames_after_eos=processed_frames,
814
+ copy_state=True
815
+ )
816
+
817
+ force_garbage_collection()
818
+
819
+ return generated_audio
820
 
821
  def save_audio_to_file(self, audio_tensor):
822
  """
 
831
  Returns:
832
  str: Path to the saved temporary WAV file
833
  """
834
+ with self.model_lock:
835
+ if self.loaded_model is None:
836
+ raise RuntimeError("TTS model is not loaded. Cannot save audio.")
837
+
838
+ audio_sample_rate = self.loaded_model.sample_rate
839
+
840
  # Convert tensor to numpy array for scipy
841
  audio_numpy_data = audio_tensor.numpy()
 
842
 
843
  # Create temporary file and write audio data
844
  output_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
 
848
  with temporary_files_lock:
849
  temporary_files_registry[output_file.name] = time.time()
850
 
851
+ trigger_background_cleanup_check()
852
+
853
  return output_file.name
854
 
855
 
 
993
  global is_currently_generating, stop_generation_requested
994
 
995
  # Run cleanup before starting new generation
996
+ if has_temporary_files_pending_cleanup():
997
+ cleanup_expired_temporary_files()
998
+
999
+ perform_memory_cleanup()
1000
 
1001
  # Validate text input
1002
  is_valid, validation_result = validate_text_input(text_input)
 
1072
  except gr.Error:
1073
  raise
1074
 
1075
+ except RuntimeError as runtime_error:
1076
+ raise gr.Error(str(runtime_error))
1077
+
1078
  except Exception as generation_error:
1079
  raise gr.Error(f"Speech generation failed: {str(generation_error)}")
1080
 
 
1086
 
1087
  if generated_audio_tensor is not None:
1088
  del generated_audio_tensor
1089
+ generated_audio_tensor = None
1090
 
1091
  if cloned_voice_state_tensor is not None:
1092
  del cloned_voice_state_tensor
1093
+ cloned_voice_state_tensor = None
1094
 
1095
+ memory_cleanup()
1096
+
1097
+ trigger_background_cleanup_check()
1098
 
1099
 
1100
  # =============================================================================