from __future__ import annotations import threading from functools import cached_property from pathlib import Path from types import SimpleNamespace from typing import Any import numpy as np class Miner: REPO_SENTINEL = "config.json" SETTINGS_FILE = "vocence_config.yaml" WARMUP_TIMEOUT = 180.0 def __init__(self, path_hf_repo: Path) -> None: self.root = Path(path_hf_repo).resolve() if not (self.root / self.REPO_SENTINEL).is_file(): raise FileNotFoundError(f"{self.REPO_SENTINEL} not present in {self.root}") _ = self.settings _ = self.model def __repr__(self) -> str: return f"" @cached_property def settings(self) -> SimpleNamespace: raw = self._load_yaml(self.root / self.SETTINGS_FILE) rt = raw.get("runtime") or {} gen = raw.get("generation") or {} lim = raw.get("limits") or {} return SimpleNamespace( language=str(lim.get("default_language") or rt.get("default_language") or "English"), sample_rate=int(gen.get("sample_rate", 24000)), max_instruction_chars=int(lim.get("max_instruction_chars", 600)), max_text_chars=int(lim.get("max_text_chars", 2000)), prefer_cuda=str(rt.get("device_preference", "cuda")).lower() == "cuda", prefer_bf16=str(rt.get("dtype", "bfloat16")).lower() == "bfloat16", prefer_flash=bool(rt.get("use_flash_attention_2", False)), ) @cached_property def model(self) -> Any: return self._instantiate_engine() def warmup(self) -> None: outcome: dict[str, Any] = {"done": False, "err": None} def _trial() -> None: try: self.generate_wav(instruction="Neutral voice.", text="Warming up.") outcome["done"] = True except Exception as exc: outcome["err"] = repr(exc) worker = threading.Thread(target=_trial, daemon=True) worker.start() worker.join(timeout=self.WARMUP_TIMEOUT) if not outcome["done"]: raise RuntimeError( f"warmup did not complete within {self.WARMUP_TIMEOUT}s: {outcome['err'] or 'no completion signal'}" ) def generate_wav(self, instruction: str, text: str) -> tuple[np.ndarray, int]: s = self.settings prompt = instruction[: s.max_instruction_chars] if s.max_instruction_chars > 0 else instruction body = text[: s.max_text_chars] if s.max_text_chars > 0 else text wavs, sample_rate = self.model.generate_voice_design( text=body, instruct=prompt, language=s.language, ) if not wavs or wavs[0] is None: raise ValueError("qwen3-tts produced no audio") wave = np.asarray(wavs[0], dtype=np.float32) if wave.ndim > 1: wave = wave.mean(axis=1) return wave, int(sample_rate) def _instantiate_engine(self) -> Any: import torch from qwen_tts import Qwen3TTSModel s = self.settings cuda_ready = bool(torch.cuda.is_available()) device_map = "cuda:0" if (s.prefer_cuda and cuda_ready) else "cpu" torch_dtype = torch.bfloat16 if (s.prefer_bf16 and cuda_ready) else torch.float32 attempts = ("flash_attention_2", "sdpa") if s.prefer_flash else ("sdpa",) last_failure: BaseException | None = None for attn in attempts: try: engine = Qwen3TTSModel.from_pretrained( pretrained_model_name_or_path=str(self.root), device_map=device_map, dtype=torch_dtype, attn_implementation=attn, ) dtype_tag = "bf16" if torch_dtype is torch.bfloat16 else "fp32" print(f"[Miner] qwen3-tts ready :: device={device_map} dtype={dtype_tag} attn={attn}") return engine except Exception as exc: last_failure = exc raise RuntimeError(f"qwen3-tts failed to load :: {last_failure!r}") @staticmethod def _load_yaml(path: Path) -> dict[str, Any]: if not path.is_file(): return {} from yaml import safe_load with path.open("r", encoding="utf-8") as fh: return safe_load(fh) or {}