Akjava commited on
Commit
1cee964
·
1 Parent(s): 697c000
Files changed (1) hide show
  1. app.py +22 -13
app.py CHANGED
@@ -36,6 +36,7 @@ MODEL_PATH = os.getenv("MODEL_PATH", os.path.join(MODELS_DIR, DEFAULT_MODEL))
36
  VOCODER_PATH = os.getenv("VOCODER_PATH", None)
37
  USE_GPU = os.getenv("USE_GPU", "false").lower() == "true"
38
  SAMPLE_RATE = 22050
 
39
 
40
 
41
  def get_available_models():
@@ -93,8 +94,9 @@ def process_japanese_text(text: str):
93
  phonemes = phonemes.replace(" ", "")
94
  phonemes = phonemes.replace("pau", " ")
95
 
96
- print(f"Input: {text}")
97
- print(f"Phonemes: {phonemes}")
 
98
 
99
  # Text to sequence
100
  sequence = text_to_sequence(phonemes)
@@ -136,7 +138,8 @@ class ONNXModelManager:
136
 
137
  def _load_model(self):
138
  """Load ONNX model(s)"""
139
- print(f"Loading model from {self.model_path} with providers {self.providers}")
 
140
  self.model = ort.InferenceSession(self.model_path, providers=self.providers)
141
 
142
  model_inputs = self.model.get_inputs()
@@ -145,12 +148,14 @@ class ONNXModelManager:
145
  self.is_multi_speaker = len(model_inputs) == 4
146
  self.has_vocoder_embedded = model_outputs[0].name == "wav"
147
 
148
- print(f"Model loaded: multi_speaker={self.is_multi_speaker}, "
149
- f"vocoder_embedded={self.has_vocoder_embedded}")
 
150
 
151
  # Load external vocoder if needed
152
  if not self.has_vocoder_embedded and self.vocoder_path:
153
- print(f"Loading external vocoder from {self.vocoder_path}")
 
154
  self.vocoder = ort.InferenceSession(self.vocoder_path, providers=self.providers)
155
 
156
  def synthesize(
@@ -204,7 +209,8 @@ def get_model_manager(model_name: str) -> ONNXModelManager:
204
  model_path = os.path.join(MODELS_DIR, model_name)
205
 
206
  if model_name not in model_managers:
207
- print(f"Loading new model: {model_name}")
 
208
  model_managers[model_name] = ONNXModelManager(
209
  model_path=model_path,
210
  vocoder_path=VOCODER_PATH,
@@ -216,10 +222,12 @@ def get_model_manager(model_name: str) -> ONNXModelManager:
216
 
217
 
218
  # Pre-load all available models
219
- print("Pre-loading all models for ZeroGPU...")
 
220
  for model_name in get_available_models():
221
  get_model_manager(model_name)
222
- print("All models loaded.")
 
223
 
224
  # ============================================================================
225
  # Gradio Interface Functions
@@ -274,9 +282,10 @@ def synthesise(
274
  audio_duration_sec = len(audio) / SAMPLE_RATE
275
  rtf = inference_time / audio_duration_sec
276
 
277
- print(f"Inference time: {inference_time:.3f}s, "
278
- f"Audio duration: {audio_duration_sec:.3f}s, "
279
- f"RTF: {rtf:.3f}")
 
280
 
281
  # Save to temporary file
282
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
@@ -445,4 +454,4 @@ if __name__ == "__main__":
445
  server_port=7860,
446
  share=False,
447
  show_error=True
448
- )
 
36
  VOCODER_PATH = os.getenv("VOCODER_PATH", None)
37
  USE_GPU = os.getenv("USE_GPU", "false").lower() == "true"
38
  SAMPLE_RATE = 22050
39
+ DEBUG = os.getenv("DEBUG", "false").lower() == "true"
40
 
41
 
42
  def get_available_models():
 
94
  phonemes = phonemes.replace(" ", "")
95
  phonemes = phonemes.replace("pau", " ")
96
 
97
+ if DEBUG:
98
+ print(f"Input: {text}")
99
+ print(f"Phonemes: {phonemes}")
100
 
101
  # Text to sequence
102
  sequence = text_to_sequence(phonemes)
 
138
 
139
  def _load_model(self):
140
  """Load ONNX model(s)"""
141
+ if DEBUG:
142
+ print(f"Loading model from {self.model_path} with providers {self.providers}")
143
  self.model = ort.InferenceSession(self.model_path, providers=self.providers)
144
 
145
  model_inputs = self.model.get_inputs()
 
148
  self.is_multi_speaker = len(model_inputs) == 4
149
  self.has_vocoder_embedded = model_outputs[0].name == "wav"
150
 
151
+ if DEBUG:
152
+ print(f"Model loaded: multi_speaker={self.is_multi_speaker}, "
153
+ f"vocoder_embedded={self.has_vocoder_embedded}")
154
 
155
  # Load external vocoder if needed
156
  if not self.has_vocoder_embedded and self.vocoder_path:
157
+ if DEBUG:
158
+ print(f"Loading external vocoder from {self.vocoder_path}")
159
  self.vocoder = ort.InferenceSession(self.vocoder_path, providers=self.providers)
160
 
161
  def synthesize(
 
209
  model_path = os.path.join(MODELS_DIR, model_name)
210
 
211
  if model_name not in model_managers:
212
+ if DEBUG:
213
+ print(f"Loading new model: {model_name}")
214
  model_managers[model_name] = ONNXModelManager(
215
  model_path=model_path,
216
  vocoder_path=VOCODER_PATH,
 
222
 
223
 
224
  # Pre-load all available models
225
+ if DEBUG:
226
+ print("Pre-loading all models for ZeroGPU...")
227
  for model_name in get_available_models():
228
  get_model_manager(model_name)
229
+ if DEBUG:
230
+ print("All models loaded.")
231
 
232
  # ============================================================================
233
  # Gradio Interface Functions
 
282
  audio_duration_sec = len(audio) / SAMPLE_RATE
283
  rtf = inference_time / audio_duration_sec
284
 
285
+ if DEBUG:
286
+ print(f"Inference time: {inference_time:.3f}s, "
287
+ f"Audio duration: {audio_duration_sec:.3f}s, "
288
+ f"RTF: {rtf:.3f}")
289
 
290
  # Save to temporary file
291
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
 
454
  server_port=7860,
455
  share=False,
456
  show_error=True
457
+ )