michael-chan-000
/

tts-instruct-e1

@@ -1,118 +1,114 @@
 from __future__ import annotations
-from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeout
 from pathlib import Path
 from typing import Any
 import numpy as np
-VOCENCE_CONFIG = "vocence_config.yaml"
-QWEN_ANCHOR = "config.json"
-WARMUP_SECONDS = 180.0
-def _load_yaml(path: Path) -> dict[str, Any]:
-    if not path.is_file():
-        return {}
-    from yaml import safe_load
-    with path.open("r", encoding="utf-8") as fh:
-        return safe_load(fh) or {}
-def _select_device(prefer_cuda: bool):
-    import torch
-    has_cuda = torch.cuda.is_available()
-    device = "cuda:0" if (prefer_cuda and has_cuda) else "cpu"
-    return device, torch, has_cuda
-def _select_dtype(torch_mod, want_bf16: bool, has_cuda: bool):
-    return torch_mod.bfloat16 if (want_bf16 and has_cuda) else torch_mod.float32
-def _build_qwen(snapshot: Path, device: str, dtype: Any, attn: str):
-    from qwen_tts import Qwen3TTSModel
-    return Qwen3TTSModel.from_pretrained(
-        pretrained_model_name_or_path=str(snapshot),
-        device_map=device,
-        dtype=dtype,
-        attn_implementation=attn,
-    )
-def _attn_order(prefer_flash: bool) -> tuple[str, ...]:
-    return ("flash_attention_2", "sdpa") if prefer_flash else ("sdpa",)
-def _mono_pcm(arr: Any) -> np.ndarray:
-    wave = np.asarray(arr, dtype=np.float32)
-    return wave.mean(axis=1) if wave.ndim > 1 else wave
-def _settings(snapshot: Path) -> dict[str, Any]:
-    raw = _load_yaml(snapshot / VOCENCE_CONFIG)
-    rt = raw.get("runtime") or {}
-    gen = raw.get("generation") or {}
-    lim = raw.get("limits") or {}
-    return {
-        "language": str(lim.get("default_language") or rt.get("default_language") or "English"),
-        "sample_rate": int(gen.get("sample_rate", 24000)),
-        "cap_instruct": int(lim.get("max_instruction_chars", 600)),
-        "cap_text": int(lim.get("max_text_chars", 2000)),
-        "prefer_cuda": str(rt.get("device_preference", "cuda")).lower() == "cuda",
-        "prefer_bf16": str(rt.get("dtype", "bfloat16")).lower() == "bfloat16",
-        "prefer_flash": bool(rt.get("use_flash_attention_2", False)),
-    }
-class Miner:
-    def __init__(self, path_hf_repo: Path) -> None:
-        snapshot = Path(path_hf_repo).resolve()
-        if not (snapshot / QWEN_ANCHOR).is_file():
-            raise FileNotFoundError(f"snapshot missing {QWEN_ANCHOR}: {snapshot}")
-        self.snapshot = snapshot
-        self.cfg = _settings(snapshot)
-        device, torch_mod, has_cuda = _select_device(self.cfg["prefer_cuda"])
-        dtype = _select_dtype(torch_mod, self.cfg["prefer_bf16"], has_cuda)
-        last_err: BaseException | None = None
-        engine = None
-        for attn in _attn_order(self.cfg["prefer_flash"]):
             try:
-                engine = _build_qwen(snapshot, device, dtype, attn)
-                tag = "bf16" if self.cfg["prefer_bf16"] and has_cuda else "fp32"
-                print(f"[Miner] qwen3-tts ready: device={device} dtype={tag} attn={attn}")
-                break
             except Exception as exc:
-                last_err = exc
-        if engine is None:
-            raise RuntimeError(f"qwen3-tts load failed: {last_err!r}")
-        self.engine = engine
-    def __repr__(self) -> str:
-        return f"<Miner snapshot={self.snapshot.name} lang={self.cfg['language']!r}>"
-    def warmup(self) -> None:
-        with ThreadPoolExecutor(max_workers=1) as pool:
-            future = pool.submit(self.generate_wav, "Neutral voice.", "Warmup phrase.")
-            try:
-                future.result(timeout=WARMUP_SECONDS)
-            except FutureTimeout:
-                raise RuntimeError(f"Miner warmup exceeded {WARMUP_SECONDS}s")
     def generate_wav(self, instruction: str, text: str) -> tuple[np.ndarray, int]:
-        cap_i = self.cfg["cap_instruct"]
-        cap_t = self.cfg["cap_text"]
-        prompt = instruction[:cap_i] if cap_i > 0 else instruction
-        body = text[:cap_t] if cap_t > 0 else text
-        wavs, sr = self.engine.generate_voice_design(
             text=body,
             instruct=prompt,
-            language=self.cfg["language"],
         )
         if not wavs or wavs[0] is None:
-            raise ValueError("qwen3-tts returned no audio")
-        return _mono_pcm(wavs[0]), int(sr)

 from __future__ import annotations
+import threading
+from functools import cached_property
 from pathlib import Path
+from types import SimpleNamespace
 from typing import Any
 import numpy as np
+class Miner:
+    REPO_SENTINEL = "config.json"
+    SETTINGS_FILE = "vocence_config.yaml"
+    WARMUP_TIMEOUT = 180.0
+    def __init__(self, path_hf_repo: Path) -> None:
+        self.root = Path(path_hf_repo).resolve()
+        if not (self.root / self.REPO_SENTINEL).is_file():
+            raise FileNotFoundError(f"{self.REPO_SENTINEL} not present in {self.root}")
+        _ = self.settings
+        _ = self.model
+    def __repr__(self) -> str:
+        return f"<Miner root={self.root.name} language={self.settings.language!r}>"
+    @cached_property
+    def settings(self) -> SimpleNamespace:
+        raw = self._load_yaml(self.root / self.SETTINGS_FILE)
+        rt = raw.get("runtime") or {}
+        gen = raw.get("generation") or {}
+        lim = raw.get("limits") or {}
+        return SimpleNamespace(
+            language=str(lim.get("default_language") or rt.get("default_language") or "English"),
+            sample_rate=int(gen.get("sample_rate", 24000)),
+            max_instruction_chars=int(lim.get("max_instruction_chars", 600)),
+            max_text_chars=int(lim.get("max_text_chars", 2000)),
+            prefer_cuda=str(rt.get("device_preference", "cuda")).lower() == "cuda",
+            prefer_bf16=str(rt.get("dtype", "bfloat16")).lower() == "bfloat16",
+            prefer_flash=bool(rt.get("use_flash_attention_2", False)),
+        )
+    @cached_property
+    def model(self) -> Any:
+        return self._instantiate_engine()
+    def warmup(self) -> None:
+        outcome: dict[str, Any] = {"done": False, "err": None}
+        def _trial() -> None:
             try:
+                self.generate_wav(instruction="Neutral voice.", text="Warming up.")
+                outcome["done"] = True
             except Exception as exc:
+                outcome["err"] = repr(exc)
+        worker = threading.Thread(target=_trial, daemon=True)
+        worker.start()
+        worker.join(timeout=self.WARMUP_TIMEOUT)
+        if not outcome["done"]:
+            raise RuntimeError(
+                f"warmup did not complete within {self.WARMUP_TIMEOUT}s: {outcome['err'] or 'no completion signal'}"
+            )
     def generate_wav(self, instruction: str, text: str) -> tuple[np.ndarray, int]:
+        s = self.settings
+        prompt = instruction[: s.max_instruction_chars] if s.max_instruction_chars > 0 else instruction
+        body = text[: s.max_text_chars] if s.max_text_chars > 0 else text
+        wavs, sample_rate = self.model.generate_voice_design(
             text=body,
             instruct=prompt,
+            language=s.language,
         )
         if not wavs or wavs[0] is None:
+            raise ValueError("qwen3-tts produced no audio")
+        wave = np.asarray(wavs[0], dtype=np.float32)
+        if wave.ndim > 1:
+            wave = wave.mean(axis=1)
+        return wave, int(sample_rate)
+    def _instantiate_engine(self) -> Any:
+        import torch
+        from qwen_tts import Qwen3TTSModel
+        s = self.settings
+        cuda_ready = bool(torch.cuda.is_available())
+        device_map = "cuda:0" if (s.prefer_cuda and cuda_ready) else "cpu"
+        torch_dtype = torch.bfloat16 if (s.prefer_bf16 and cuda_ready) else torch.float32
+        attempts = ("flash_attention_2", "sdpa") if s.prefer_flash else ("sdpa",)
+        model_name = str(self.root)
+        last_failure: BaseException | None = None
+        for attn in attempts:
+            try:
+                engine = Qwen3TTSModel.from_pretrained(
+                    pretrained_model_name_or_path=model_name,
+                    device_map=device_map,
+                    dtype=torch_dtype,
+                    attn_implementation=attn,
+                )
+                dtype_tag = "bf16" if torch_dtype is torch.bfloat16 else "fp32"
+                print(f"[Miner] qwen3-tts ready :: device={device_map} dtype={dtype_tag} attn={attn}")
+                return engine
+            except Exception as exc:
+                last_failure = exc
+        raise RuntimeError(f"qwen3-tts failed to load :: {last_failure!r}")
+    @staticmethod
+    def _load_yaml(path: Path) -> dict[str, Any]:
+        if not path.is_file():
+            return {}
+        from yaml import safe_load
+        with path.open("r", encoding="utf-8") as fh:
+            return safe_load(fh) or {}