Spaces:

nukopy
/

vallex-prototyping

Sleeping

App Files Files Community

nukopy commited on Oct 29, 2025

Commit

9236f5d

1 Parent(s): df70e48

fix: logger.info to print for HuggingFace Spaces logging

Browse files

Files changed (3) hide show

apps/audio_cloning/cheched_vallex.py +7 -7
apps/audio_cloning/main.py +1 -1
apps/audio_cloning/vallex/main.py +17 -17

apps/audio_cloning/cheched_vallex.py CHANGED Viewed

@@ -154,7 +154,7 @@ def infer_from_cached_prompt(
     timings: List[Tuple[str, float]] = []
     start_time = time.perf_counter()
     try:
-        logger.info("Loading cached prompt from: %s", prompt_path)
         prompt_data = np.load(prompt_path)
         audio_tokens = torch.from_numpy(prompt_data["audio_tokens"]).to(
             dtype=torch.long
@@ -201,7 +201,7 @@ def infer_from_cached_prompt(
         audio_prompts = audio_prompts.unsqueeze(0)
     start_time = time.perf_counter()
-    logger.info("Start inferring from cached prompt: %s", prompt_path)
     encoded_frames = vallex.model.inference(
         text_tokens.to(vallex.device),
         text_tokens_lens.to(vallex.device),
@@ -216,17 +216,17 @@ def infer_from_cached_prompt(
         best_of=5,
     )
     timings.append(("音響モデル推論", time.perf_counter() - start_time))
-    logger.info("Inference completed")
     start_time = time.perf_counter()
-    logger.info("Decoding with Vocos...")
     frames = encoded_frames.permute(2, 0, 1)
     features = vallex.vocos.codes_to_features(frames)
     samples = vallex.vocos.decode(
         features, bandwidth_id=torch.tensor([2], device=vallex.device)
     )
     timings.append(("ボコーダ復号", time.perf_counter() - start_time))
-    logger.info("Decoding completed")
     message = (
         f"Loaded cached prompt: {prompt_filename}\n"
@@ -235,12 +235,12 @@ def infer_from_cached_prompt(
     )
     for step, duration in timings:
-        logger.info("%s：%.4f sec", step, duration)
     timing_report = "\n↓\n".join(
         f"{step}：{duration:.4f} sec" for step, duration in timings
     )
-    logger.info("推論ステップ計測結果\n%s", timing_report)
     return message, (24000, samples.squeeze(0).cpu().numpy())

     timings: List[Tuple[str, float]] = []
     start_time = time.perf_counter()
     try:
+        print("Loading cached prompt from: %s", prompt_path)
         prompt_data = np.load(prompt_path)
         audio_tokens = torch.from_numpy(prompt_data["audio_tokens"]).to(
             dtype=torch.long
         audio_prompts = audio_prompts.unsqueeze(0)
     start_time = time.perf_counter()
+    print("Start inferring from cached prompt: %s", prompt_path)
     encoded_frames = vallex.model.inference(
         text_tokens.to(vallex.device),
         text_tokens_lens.to(vallex.device),
         best_of=5,
     )
     timings.append(("音響モデル推論", time.perf_counter() - start_time))
+    print("Inference completed")
     start_time = time.perf_counter()
+    print("Decoding with Vocos...")
     frames = encoded_frames.permute(2, 0, 1)
     features = vallex.vocos.codes_to_features(frames)
     samples = vallex.vocos.decode(
         features, bandwidth_id=torch.tensor([2], device=vallex.device)
     )
     timings.append(("ボコーダ復号", time.perf_counter() - start_time))
+    print("Decoding completed")
     message = (
         f"Loaded cached prompt: {prompt_filename}\n"
     )
     for step, duration in timings:
+        print("%s：%.4f sec", step, duration)
     timing_report = "\n↓\n".join(
         f"{step}：{duration:.4f} sec" for step, duration in timings
     )
+    print("推論ステップ計測結果\n%s", timing_report)
     return message, (24000, samples.squeeze(0).cpu().numpy())

apps/audio_cloning/main.py CHANGED Viewed

@@ -15,7 +15,7 @@ def main():
     setup_logger()
     # gradio app
-    logger.info("Initializing Gradio app")
     gr.Markdown("# Charamix Audio Cloning Prototype")
     # zero-shot audio cloning

     setup_logger()
     # gradio app
+    print("Initializing Gradio app")
     gr.Markdown("# Charamix Audio Cloning Prototype")
     # zero-shot audio cloning

apps/audio_cloning/vallex/main.py CHANGED Viewed

@@ -43,25 +43,25 @@ logger = logging.getLogger(__name__)
 # set base directory
 OUTPUT_BASE_DIR = os.getenv("HF_HOME", ".")
 PREPARED_BASE_DIR = "."
-logger.info("Base directory: %s", OUTPUT_BASE_DIR)
-logger.info("Prepared base directory: %s", PREPARED_BASE_DIR)
 # set languages
 langid.set_languages(["en", "zh", "ja"])
 # set nltk data path
 nltk.data.path = nltk.data.path + [os.path.join(os.getcwd(), "nltk_data")]
-logger.info("nltk_data path: %s", nltk.data.path)
 # get encoding
-logger.info(
     "default encoding is %s,file system encoding is %s",
     sys.getdefaultencoding(),
     sys.getfilesystemencoding(),
 )
 # check python version
-logger.info("You are using Python version %s", platform.python_version())
 if sys.version_info[0] < 3 or sys.version_info[1] < 7:
     logger.warning("The Python version is too low and may cause problems")
 if platform.system().lower() == "windows":
@@ -74,7 +74,7 @@ os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
 # set torch threads (guarded for hot-reload)
 thread_count = multiprocessing.cpu_count()
-logger.info("Use %d cpu cores for computing", thread_count)
 if not getattr(torch, "_vallex_threads_configured", False):
     torch.set_num_threads(thread_count)
     try:
@@ -88,10 +88,10 @@ if not getattr(torch, "_vallex_threads_configured", False):
     # gradio のリロード時に torch.set_num_iterop_threads を実行するとエラーになるので、設定済みのフラグをセット
     setattr(torch, "_vallex_threads_configured", True)
 else:
-    logger.info("Torch threads already configured; skipping reconfiguration")
 # set text tokenizer and collater
-logger.info("Setting text tokenizer and collater...")
 tokenizer_path = os.path.join(
     PREPARED_BASE_DIR, "apps/audio_cloning/vallex/g2p/bpe_69.json"
 )
@@ -99,13 +99,13 @@ text_tokenizer = PhonemeBpeTokenizer(tokenizer_path=tokenizer_path)
 text_collater = get_text_token_collater()
 # set device
-logger.info("Setting device...")
 device = torch.device("cpu")
 if torch.cuda.is_available():
     device = torch.device("cuda", 0)
 # if torch.backends.mps.is_available():
 #     device = torch.device("mps")
-logger.info("Device set to %s", device)
 # Download VALL-E-X model weights if not exists
 OUTPUT_DIR_CHECKPOINTS = os.path.join(OUTPUT_BASE_DIR, "models/checkpoints")
@@ -127,7 +127,7 @@ if not os.path.exists(OUTPUT_PATH_CHECKPOINTS):
             out=OUTPUT_PATH_CHECKPOINTS,
             bar=wget.bar_adaptive,
         )
-        logger.info("Model weights downloaded successfully")
     except Exception as e:
         logger.error("Error downloading model weights: %s", e)
         raise Exception(
@@ -156,7 +156,7 @@ assert not missing_keys
 model.eval()
 # Encodec-based tokenizer: converts reference audio into discrete conditioning tokens for VALLE
-logger.info("Initializing Encodec-based tokenizer...")
 audio_tokenizer = AudioTokenizer(device)
 # Vocos vocoder: decodes VALLE's discrete acoustic codes back into a 24 kHz waveform
@@ -168,12 +168,12 @@ if not os.path.exists(OUTPUT_DIR_WHISPER):
     os.makedirs(OUTPUT_DIR_WHISPER, exist_ok=True)
 try:
-    logger.info("Loading Whisper model...")
     model_name = "tiny"
     whisper_model = whisper.load_model(
         model_name, device="cpu", download_root=OUTPUT_DIR_WHISPER
     )
-    logger.info("Whisper model loaded successfully")
 except NotImplementedError as e:
     logger.error("Error on loading Whisper model: %s", e)
     raise Exception(
@@ -188,7 +188,7 @@ except Exception as e:
     ) from e
 # Initialize Voice Presets
-logger.info("Initializing Voice Presets...")
 PRESETS_DIR = os.path.join(PREPARED_BASE_DIR, "apps/audio_cloning/vallex/presets")
 preset_list = os.walk(PRESETS_DIR).__next__()[2]
 preset_list = [preset[:-4] for preset in preset_list if preset.endswith(".npz")]
@@ -436,12 +436,12 @@ def infer_from_audio(
     timings.append(("ボコーダ復号", time.perf_counter() - start_time))
     for step, duration in timings:
-        logger.info("%s：%.4f sec", step, duration)
     timing_report = "\n↓\n".join(
         f"{step}：{duration:.4f} sec" for step, duration in timings
     )
-    logger.info("推論ステップ計測結果\n%s", timing_report)
     message = f"text prompt: {text_pr}\nsythesized text: {text}"
     return message, (24000, samples.squeeze(0).cpu().numpy())

 # set base directory
 OUTPUT_BASE_DIR = os.getenv("HF_HOME", ".")
 PREPARED_BASE_DIR = "."
+print("Base directory: %s", OUTPUT_BASE_DIR)
+print("Prepared base directory: %s", PREPARED_BASE_DIR)
 # set languages
 langid.set_languages(["en", "zh", "ja"])
 # set nltk data path
 nltk.data.path = nltk.data.path + [os.path.join(os.getcwd(), "nltk_data")]
+print("nltk_data path: %s", nltk.data.path)
 # get encoding
+print(
     "default encoding is %s,file system encoding is %s",
     sys.getdefaultencoding(),
     sys.getfilesystemencoding(),
 )
 # check python version
+print("You are using Python version %s", platform.python_version())
 if sys.version_info[0] < 3 or sys.version_info[1] < 7:
     logger.warning("The Python version is too low and may cause problems")
 if platform.system().lower() == "windows":
 # set torch threads (guarded for hot-reload)
 thread_count = multiprocessing.cpu_count()
+print("Use %d cpu cores for computing", thread_count)
 if not getattr(torch, "_vallex_threads_configured", False):
     torch.set_num_threads(thread_count)
     try:
     # gradio のリロード時に torch.set_num_iterop_threads を実行するとエラーになるので、設定済みのフラグをセット
     setattr(torch, "_vallex_threads_configured", True)
 else:
+    print("Torch threads already configured; skipping reconfiguration")
 # set text tokenizer and collater
+print("Setting text tokenizer and collater...")
 tokenizer_path = os.path.join(
     PREPARED_BASE_DIR, "apps/audio_cloning/vallex/g2p/bpe_69.json"
 )
 text_collater = get_text_token_collater()
 # set device
+print("Setting device...")
 device = torch.device("cpu")
 if torch.cuda.is_available():
     device = torch.device("cuda", 0)
 # if torch.backends.mps.is_available():
 #     device = torch.device("mps")
+print("Device set to %s", device)
 # Download VALL-E-X model weights if not exists
 OUTPUT_DIR_CHECKPOINTS = os.path.join(OUTPUT_BASE_DIR, "models/checkpoints")
             out=OUTPUT_PATH_CHECKPOINTS,
             bar=wget.bar_adaptive,
         )
+        print("Model weights downloaded successfully")
     except Exception as e:
         logger.error("Error downloading model weights: %s", e)
         raise Exception(
 model.eval()
 # Encodec-based tokenizer: converts reference audio into discrete conditioning tokens for VALLE
+print("Initializing Encodec-based tokenizer...")
 audio_tokenizer = AudioTokenizer(device)
 # Vocos vocoder: decodes VALLE's discrete acoustic codes back into a 24 kHz waveform
     os.makedirs(OUTPUT_DIR_WHISPER, exist_ok=True)
 try:
+    print("Loading Whisper model...")
     model_name = "tiny"
     whisper_model = whisper.load_model(
         model_name, device="cpu", download_root=OUTPUT_DIR_WHISPER
     )
+    print("Whisper model loaded successfully")
 except NotImplementedError as e:
     logger.error("Error on loading Whisper model: %s", e)
     raise Exception(
     ) from e
 # Initialize Voice Presets
+print("Initializing Voice Presets...")
 PRESETS_DIR = os.path.join(PREPARED_BASE_DIR, "apps/audio_cloning/vallex/presets")
 preset_list = os.walk(PRESETS_DIR).__next__()[2]
 preset_list = [preset[:-4] for preset in preset_list if preset.endswith(".npz")]
     timings.append(("ボコーダ復号", time.perf_counter() - start_time))
     for step, duration in timings:
+        print("%s：%.4f sec", step, duration)
     timing_report = "\n↓\n".join(
         f"{step}：{duration:.4f} sec" for step, duration in timings
     )
+    print("推論ステップ計測結果\n%s", timing_report)
     message = f"text prompt: {text_pr}\nsythesized text: {text}"
     return message, (24000, samples.squeeze(0).cpu().numpy())