| from __future__ import annotations |
|
|
| import threading |
| from functools import cached_property |
| from pathlib import Path |
| from types import SimpleNamespace |
| from typing import Any |
|
|
| import numpy as np |
|
|
|
|
| class Miner: |
|
|
| REPO_SENTINEL = "config.json" |
| SETTINGS_FILE = "vocence_config.yaml" |
| WARMUP_TIMEOUT = 180.0 |
|
|
| def __init__(self, path_hf_repo: Path) -> None: |
| self.root = Path(path_hf_repo).resolve() |
| if not (self.root / self.REPO_SENTINEL).is_file(): |
| raise FileNotFoundError(f"{self.REPO_SENTINEL} not present in {self.root}") |
| _ = self.settings |
| _ = self.model |
|
|
| def __repr__(self) -> str: |
| return f"<Miner root={self.root.name} language={self.settings.language!r}>" |
|
|
| @cached_property |
| def settings(self) -> SimpleNamespace: |
| raw = self._load_yaml(self.root / self.SETTINGS_FILE) |
| rt = raw.get("runtime") or {} |
| gen = raw.get("generation") or {} |
| lim = raw.get("limits") or {} |
| return SimpleNamespace( |
| language=str(lim.get("default_language") or rt.get("default_language") or "English"), |
| sample_rate=int(gen.get("sample_rate", 24000)), |
| max_instruction_chars=int(lim.get("max_instruction_chars", 600)), |
| max_text_chars=int(lim.get("max_text_chars", 2000)), |
| prefer_cuda=str(rt.get("device_preference", "cuda")).lower() == "cuda", |
| prefer_bf16=str(rt.get("dtype", "bfloat16")).lower() == "bfloat16", |
| prefer_flash=bool(rt.get("use_flash_attention_2", False)), |
| ) |
|
|
| @cached_property |
| def model(self) -> Any: |
| return self._instantiate_engine() |
|
|
| def warmup(self) -> None: |
| outcome: dict[str, Any] = {"done": False, "err": None} |
|
|
| def _trial() -> None: |
| try: |
| self.generate_wav(instruction="Neutral voice.", text="Warming up.") |
| outcome["done"] = True |
| except Exception as exc: |
| outcome["err"] = repr(exc) |
|
|
| worker = threading.Thread(target=_trial, daemon=True) |
| worker.start() |
| worker.join(timeout=self.WARMUP_TIMEOUT) |
| if not outcome["done"]: |
| raise RuntimeError( |
| f"warmup did not complete within {self.WARMUP_TIMEOUT}s: {outcome['err'] or 'no completion signal'}" |
| ) |
|
|
| def generate_wav(self, instruction: str, text: str) -> tuple[np.ndarray, int]: |
| s = self.settings |
| prompt = instruction[: s.max_instruction_chars] if s.max_instruction_chars > 0 else instruction |
| body = text[: s.max_text_chars] if s.max_text_chars > 0 else text |
| wavs, sample_rate = self.model.generate_voice_design( |
| text=body, |
| instruct=prompt, |
| language=s.language, |
| ) |
| if not wavs or wavs[0] is None: |
| raise ValueError("qwen3-tts produced no audio") |
| wave = np.asarray(wavs[0], dtype=np.float32) |
| if wave.ndim > 1: |
| wave = wave.mean(axis=1) |
| return wave, int(sample_rate) |
|
|
| def _instantiate_engine(self) -> Any: |
| import torch |
| from qwen_tts import Qwen3TTSModel |
|
|
| s = self.settings |
| cuda_ready = bool(torch.cuda.is_available()) |
| device_map = "cuda:0" if (s.prefer_cuda and cuda_ready) else "cpu" |
| torch_dtype = torch.bfloat16 if (s.prefer_bf16 and cuda_ready) else torch.float32 |
| attempts = ("flash_attention_2", "sdpa") if s.prefer_flash else ("sdpa",) |
| model_name = str(self.root) |
| last_failure: BaseException | None = None |
| for attn in attempts: |
| try: |
| engine = Qwen3TTSModel.from_pretrained( |
| pretrained_model_name_or_path=model_name, |
| device_map=device_map, |
| dtype=torch_dtype, |
| attn_implementation=attn, |
| ) |
| dtype_tag = "bf16" if torch_dtype is torch.bfloat16 else "fp32" |
| print(f"[Miner] qwen3-tts ready :: device={device_map} dtype={dtype_tag} attn={attn}") |
| return engine |
| except Exception as exc: |
| last_failure = exc |
| raise RuntimeError(f"qwen3-tts failed to load :: {last_failure!r}") |
|
|
| @staticmethod |
| def _load_yaml(path: Path) -> dict[str, Any]: |
| if not path.is_file(): |
| return {} |
| from yaml import safe_load |
| with path.open("r", encoding="utf-8") as fh: |
| return safe_load(fh) or {} |