nukopy commited on
Commit
9236f5d
·
1 Parent(s): df70e48

fix: logger.info to print for HuggingFace Spaces logging

Browse files
apps/audio_cloning/cheched_vallex.py CHANGED
@@ -154,7 +154,7 @@ def infer_from_cached_prompt(
154
  timings: List[Tuple[str, float]] = []
155
  start_time = time.perf_counter()
156
  try:
157
- logger.info("Loading cached prompt from: %s", prompt_path)
158
  prompt_data = np.load(prompt_path)
159
  audio_tokens = torch.from_numpy(prompt_data["audio_tokens"]).to(
160
  dtype=torch.long
@@ -201,7 +201,7 @@ def infer_from_cached_prompt(
201
  audio_prompts = audio_prompts.unsqueeze(0)
202
 
203
  start_time = time.perf_counter()
204
- logger.info("Start inferring from cached prompt: %s", prompt_path)
205
  encoded_frames = vallex.model.inference(
206
  text_tokens.to(vallex.device),
207
  text_tokens_lens.to(vallex.device),
@@ -216,17 +216,17 @@ def infer_from_cached_prompt(
216
  best_of=5,
217
  )
218
  timings.append(("音響モデル推論", time.perf_counter() - start_time))
219
- logger.info("Inference completed")
220
 
221
  start_time = time.perf_counter()
222
- logger.info("Decoding with Vocos...")
223
  frames = encoded_frames.permute(2, 0, 1)
224
  features = vallex.vocos.codes_to_features(frames)
225
  samples = vallex.vocos.decode(
226
  features, bandwidth_id=torch.tensor([2], device=vallex.device)
227
  )
228
  timings.append(("ボコーダ復号", time.perf_counter() - start_time))
229
- logger.info("Decoding completed")
230
 
231
  message = (
232
  f"Loaded cached prompt: {prompt_filename}\n"
@@ -235,12 +235,12 @@ def infer_from_cached_prompt(
235
  )
236
 
237
  for step, duration in timings:
238
- logger.info("%s:%.4f sec", step, duration)
239
 
240
  timing_report = "\n↓\n".join(
241
  f"{step}:{duration:.4f} sec" for step, duration in timings
242
  )
243
- logger.info("推論ステップ計測結果\n%s", timing_report)
244
 
245
  return message, (24000, samples.squeeze(0).cpu().numpy())
246
 
 
154
  timings: List[Tuple[str, float]] = []
155
  start_time = time.perf_counter()
156
  try:
157
+ print("Loading cached prompt from: %s", prompt_path)
158
  prompt_data = np.load(prompt_path)
159
  audio_tokens = torch.from_numpy(prompt_data["audio_tokens"]).to(
160
  dtype=torch.long
 
201
  audio_prompts = audio_prompts.unsqueeze(0)
202
 
203
  start_time = time.perf_counter()
204
+ print("Start inferring from cached prompt: %s", prompt_path)
205
  encoded_frames = vallex.model.inference(
206
  text_tokens.to(vallex.device),
207
  text_tokens_lens.to(vallex.device),
 
216
  best_of=5,
217
  )
218
  timings.append(("音響モデル推論", time.perf_counter() - start_time))
219
+ print("Inference completed")
220
 
221
  start_time = time.perf_counter()
222
+ print("Decoding with Vocos...")
223
  frames = encoded_frames.permute(2, 0, 1)
224
  features = vallex.vocos.codes_to_features(frames)
225
  samples = vallex.vocos.decode(
226
  features, bandwidth_id=torch.tensor([2], device=vallex.device)
227
  )
228
  timings.append(("ボコーダ復号", time.perf_counter() - start_time))
229
+ print("Decoding completed")
230
 
231
  message = (
232
  f"Loaded cached prompt: {prompt_filename}\n"
 
235
  )
236
 
237
  for step, duration in timings:
238
+ print("%s:%.4f sec", step, duration)
239
 
240
  timing_report = "\n↓\n".join(
241
  f"{step}:{duration:.4f} sec" for step, duration in timings
242
  )
243
+ print("推論ステップ計測結果\n%s", timing_report)
244
 
245
  return message, (24000, samples.squeeze(0).cpu().numpy())
246
 
apps/audio_cloning/main.py CHANGED
@@ -15,7 +15,7 @@ def main():
15
  setup_logger()
16
 
17
  # gradio app
18
- logger.info("Initializing Gradio app")
19
  gr.Markdown("# Charamix Audio Cloning Prototype")
20
 
21
  # zero-shot audio cloning
 
15
  setup_logger()
16
 
17
  # gradio app
18
+ print("Initializing Gradio app")
19
  gr.Markdown("# Charamix Audio Cloning Prototype")
20
 
21
  # zero-shot audio cloning
apps/audio_cloning/vallex/main.py CHANGED
@@ -43,25 +43,25 @@ logger = logging.getLogger(__name__)
43
  # set base directory
44
  OUTPUT_BASE_DIR = os.getenv("HF_HOME", ".")
45
  PREPARED_BASE_DIR = "."
46
- logger.info("Base directory: %s", OUTPUT_BASE_DIR)
47
- logger.info("Prepared base directory: %s", PREPARED_BASE_DIR)
48
 
49
  # set languages
50
  langid.set_languages(["en", "zh", "ja"])
51
 
52
  # set nltk data path
53
  nltk.data.path = nltk.data.path + [os.path.join(os.getcwd(), "nltk_data")]
54
- logger.info("nltk_data path: %s", nltk.data.path)
55
 
56
  # get encoding
57
- logger.info(
58
  "default encoding is %s,file system encoding is %s",
59
  sys.getdefaultencoding(),
60
  sys.getfilesystemencoding(),
61
  )
62
 
63
  # check python version
64
- logger.info("You are using Python version %s", platform.python_version())
65
  if sys.version_info[0] < 3 or sys.version_info[1] < 7:
66
  logger.warning("The Python version is too low and may cause problems")
67
  if platform.system().lower() == "windows":
@@ -74,7 +74,7 @@ os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
74
 
75
  # set torch threads (guarded for hot-reload)
76
  thread_count = multiprocessing.cpu_count()
77
- logger.info("Use %d cpu cores for computing", thread_count)
78
  if not getattr(torch, "_vallex_threads_configured", False):
79
  torch.set_num_threads(thread_count)
80
  try:
@@ -88,10 +88,10 @@ if not getattr(torch, "_vallex_threads_configured", False):
88
  # gradio のリロード時に torch.set_num_iterop_threads を実行するとエラーになるので、設定済みのフラグをセット
89
  setattr(torch, "_vallex_threads_configured", True)
90
  else:
91
- logger.info("Torch threads already configured; skipping reconfiguration")
92
 
93
  # set text tokenizer and collater
94
- logger.info("Setting text tokenizer and collater...")
95
  tokenizer_path = os.path.join(
96
  PREPARED_BASE_DIR, "apps/audio_cloning/vallex/g2p/bpe_69.json"
97
  )
@@ -99,13 +99,13 @@ text_tokenizer = PhonemeBpeTokenizer(tokenizer_path=tokenizer_path)
99
  text_collater = get_text_token_collater()
100
 
101
  # set device
102
- logger.info("Setting device...")
103
  device = torch.device("cpu")
104
  if torch.cuda.is_available():
105
  device = torch.device("cuda", 0)
106
  # if torch.backends.mps.is_available():
107
  # device = torch.device("mps")
108
- logger.info("Device set to %s", device)
109
 
110
  # Download VALL-E-X model weights if not exists
111
  OUTPUT_DIR_CHECKPOINTS = os.path.join(OUTPUT_BASE_DIR, "models/checkpoints")
@@ -127,7 +127,7 @@ if not os.path.exists(OUTPUT_PATH_CHECKPOINTS):
127
  out=OUTPUT_PATH_CHECKPOINTS,
128
  bar=wget.bar_adaptive,
129
  )
130
- logger.info("Model weights downloaded successfully")
131
  except Exception as e:
132
  logger.error("Error downloading model weights: %s", e)
133
  raise Exception(
@@ -156,7 +156,7 @@ assert not missing_keys
156
  model.eval()
157
 
158
  # Encodec-based tokenizer: converts reference audio into discrete conditioning tokens for VALLE
159
- logger.info("Initializing Encodec-based tokenizer...")
160
  audio_tokenizer = AudioTokenizer(device)
161
 
162
  # Vocos vocoder: decodes VALLE's discrete acoustic codes back into a 24 kHz waveform
@@ -168,12 +168,12 @@ if not os.path.exists(OUTPUT_DIR_WHISPER):
168
  os.makedirs(OUTPUT_DIR_WHISPER, exist_ok=True)
169
 
170
  try:
171
- logger.info("Loading Whisper model...")
172
  model_name = "tiny"
173
  whisper_model = whisper.load_model(
174
  model_name, device="cpu", download_root=OUTPUT_DIR_WHISPER
175
  )
176
- logger.info("Whisper model loaded successfully")
177
  except NotImplementedError as e:
178
  logger.error("Error on loading Whisper model: %s", e)
179
  raise Exception(
@@ -188,7 +188,7 @@ except Exception as e:
188
  ) from e
189
 
190
  # Initialize Voice Presets
191
- logger.info("Initializing Voice Presets...")
192
  PRESETS_DIR = os.path.join(PREPARED_BASE_DIR, "apps/audio_cloning/vallex/presets")
193
  preset_list = os.walk(PRESETS_DIR).__next__()[2]
194
  preset_list = [preset[:-4] for preset in preset_list if preset.endswith(".npz")]
@@ -436,12 +436,12 @@ def infer_from_audio(
436
  timings.append(("ボコーダ復号", time.perf_counter() - start_time))
437
 
438
  for step, duration in timings:
439
- logger.info("%s:%.4f sec", step, duration)
440
 
441
  timing_report = "\n↓\n".join(
442
  f"{step}:{duration:.4f} sec" for step, duration in timings
443
  )
444
- logger.info("推論ステップ計測結果\n%s", timing_report)
445
 
446
  message = f"text prompt: {text_pr}\nsythesized text: {text}"
447
  return message, (24000, samples.squeeze(0).cpu().numpy())
 
43
  # set base directory
44
  OUTPUT_BASE_DIR = os.getenv("HF_HOME", ".")
45
  PREPARED_BASE_DIR = "."
46
+ print("Base directory: %s", OUTPUT_BASE_DIR)
47
+ print("Prepared base directory: %s", PREPARED_BASE_DIR)
48
 
49
  # set languages
50
  langid.set_languages(["en", "zh", "ja"])
51
 
52
  # set nltk data path
53
  nltk.data.path = nltk.data.path + [os.path.join(os.getcwd(), "nltk_data")]
54
+ print("nltk_data path: %s", nltk.data.path)
55
 
56
  # get encoding
57
+ print(
58
  "default encoding is %s,file system encoding is %s",
59
  sys.getdefaultencoding(),
60
  sys.getfilesystemencoding(),
61
  )
62
 
63
  # check python version
64
+ print("You are using Python version %s", platform.python_version())
65
  if sys.version_info[0] < 3 or sys.version_info[1] < 7:
66
  logger.warning("The Python version is too low and may cause problems")
67
  if platform.system().lower() == "windows":
 
74
 
75
  # set torch threads (guarded for hot-reload)
76
  thread_count = multiprocessing.cpu_count()
77
+ print("Use %d cpu cores for computing", thread_count)
78
  if not getattr(torch, "_vallex_threads_configured", False):
79
  torch.set_num_threads(thread_count)
80
  try:
 
88
  # gradio のリロード時に torch.set_num_iterop_threads を実行するとエラーになるので、設定済みのフラグをセット
89
  setattr(torch, "_vallex_threads_configured", True)
90
  else:
91
+ print("Torch threads already configured; skipping reconfiguration")
92
 
93
  # set text tokenizer and collater
94
+ print("Setting text tokenizer and collater...")
95
  tokenizer_path = os.path.join(
96
  PREPARED_BASE_DIR, "apps/audio_cloning/vallex/g2p/bpe_69.json"
97
  )
 
99
  text_collater = get_text_token_collater()
100
 
101
  # set device
102
+ print("Setting device...")
103
  device = torch.device("cpu")
104
  if torch.cuda.is_available():
105
  device = torch.device("cuda", 0)
106
  # if torch.backends.mps.is_available():
107
  # device = torch.device("mps")
108
+ print("Device set to %s", device)
109
 
110
  # Download VALL-E-X model weights if not exists
111
  OUTPUT_DIR_CHECKPOINTS = os.path.join(OUTPUT_BASE_DIR, "models/checkpoints")
 
127
  out=OUTPUT_PATH_CHECKPOINTS,
128
  bar=wget.bar_adaptive,
129
  )
130
+ print("Model weights downloaded successfully")
131
  except Exception as e:
132
  logger.error("Error downloading model weights: %s", e)
133
  raise Exception(
 
156
  model.eval()
157
 
158
  # Encodec-based tokenizer: converts reference audio into discrete conditioning tokens for VALLE
159
+ print("Initializing Encodec-based tokenizer...")
160
  audio_tokenizer = AudioTokenizer(device)
161
 
162
  # Vocos vocoder: decodes VALLE's discrete acoustic codes back into a 24 kHz waveform
 
168
  os.makedirs(OUTPUT_DIR_WHISPER, exist_ok=True)
169
 
170
  try:
171
+ print("Loading Whisper model...")
172
  model_name = "tiny"
173
  whisper_model = whisper.load_model(
174
  model_name, device="cpu", download_root=OUTPUT_DIR_WHISPER
175
  )
176
+ print("Whisper model loaded successfully")
177
  except NotImplementedError as e:
178
  logger.error("Error on loading Whisper model: %s", e)
179
  raise Exception(
 
188
  ) from e
189
 
190
  # Initialize Voice Presets
191
+ print("Initializing Voice Presets...")
192
  PRESETS_DIR = os.path.join(PREPARED_BASE_DIR, "apps/audio_cloning/vallex/presets")
193
  preset_list = os.walk(PRESETS_DIR).__next__()[2]
194
  preset_list = [preset[:-4] for preset in preset_list if preset.endswith(".npz")]
 
436
  timings.append(("ボコーダ復号", time.perf_counter() - start_time))
437
 
438
  for step, duration in timings:
439
+ print("%s:%.4f sec", step, duration)
440
 
441
  timing_report = "\n↓\n".join(
442
  f"{step}:{duration:.4f} sec" for step, duration in timings
443
  )
444
+ print("推論ステップ計測結果\n%s", timing_report)
445
 
446
  message = f"text prompt: {text_pr}\nsythesized text: {text}"
447
  return message, (24000, samples.squeeze(0).cpu().numpy())