Instructions to use arwin0727/tts_engine_model with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use arwin0727/tts_engine_model with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-to-speech", model="arwin0727/tts_engine_model")# Load model directly from transformers import AutoModelForSeq2SeqLM model = AutoModelForSeq2SeqLM.from_pretrained("arwin0727/tts_engine_model", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| from __future__ import annotations | |
| import threading | |
| from functools import cached_property | |
| from pathlib import Path | |
| from types import SimpleNamespace | |
| from typing import Any | |
| import numpy as np | |
| class Miner: | |
| REPO_SENTINEL = "config.json" | |
| SETTINGS_FILE = "vocence_config.yaml" | |
| WARMUP_TIMEOUT = 180.0 | |
| def __init__(self, path_hf_repo: Path) -> None: | |
| self.root = Path(path_hf_repo).resolve() | |
| if not (self.root / self.REPO_SENTINEL).is_file(): | |
| raise FileNotFoundError(f"{self.REPO_SENTINEL} not present in {self.root}") | |
| _ = self.settings | |
| _ = self.model | |
| def __repr__(self) -> str: | |
| return f"<Miner root={self.root.name} language={self.settings.language!r}>" | |
| def settings(self) -> SimpleNamespace: | |
| raw = self._load_yaml(self.root / self.SETTINGS_FILE) | |
| rt = raw.get("runtime") or {} | |
| gen = raw.get("generation") or {} | |
| lim = raw.get("limits") or {} | |
| return SimpleNamespace( | |
| language=str(lim.get("default_language") or rt.get("default_language") or "English"), | |
| sample_rate=int(gen.get("sample_rate", 24000)), | |
| max_instruction_chars=int(lim.get("max_instruction_chars", 600)), | |
| max_text_chars=int(lim.get("max_text_chars", 2000)), | |
| prefer_cuda=str(rt.get("device_preference", "cuda")).lower() == "cuda", | |
| prefer_bf16=str(rt.get("dtype", "bfloat16")).lower() == "bfloat16", | |
| prefer_flash=bool(rt.get("use_flash_attention_2", False)), | |
| ) | |
| def model(self) -> Any: | |
| return self._instantiate_engine() | |
| def warmup(self) -> None: | |
| outcome: dict[str, Any] = {"done": False, "err": None} | |
| def _trial() -> None: | |
| try: | |
| self.generate_wav(instruction="Neutral voice.", text="Warming up.") | |
| outcome["done"] = True | |
| except Exception as exc: | |
| outcome["err"] = repr(exc) | |
| worker = threading.Thread(target=_trial, daemon=True) | |
| worker.start() | |
| worker.join(timeout=self.WARMUP_TIMEOUT) | |
| if not outcome["done"]: | |
| raise RuntimeError( | |
| f"warmup did not complete within {self.WARMUP_TIMEOUT}s: {outcome['err'] or 'no completion signal'}" | |
| ) | |
| def generate_wav(self, instruction: str, text: str) -> tuple[np.ndarray, int]: | |
| s = self.settings | |
| prompt = instruction[: s.max_instruction_chars] if s.max_instruction_chars > 0 else instruction | |
| body = text[: s.max_text_chars] if s.max_text_chars > 0 else text | |
| wavs, sample_rate = self.model.generate_voice_design( | |
| text=body, | |
| instruct=prompt, | |
| language=s.language, | |
| ) | |
| if not wavs or wavs[0] is None: | |
| raise ValueError("qwen3-tts produced no audio") | |
| wave = np.asarray(wavs[0], dtype=np.float32) | |
| if wave.ndim > 1: | |
| wave = wave.mean(axis=1) | |
| return wave, int(sample_rate) | |
| def _instantiate_engine(self) -> Any: | |
| import torch | |
| from qwen_tts import Qwen3TTSModel | |
| s = self.settings | |
| cuda_ready = bool(torch.cuda.is_available()) | |
| device_map = "cuda:0" if (s.prefer_cuda and cuda_ready) else "cpu" | |
| torch_dtype = torch.bfloat16 if (s.prefer_bf16 and cuda_ready) else torch.float32 | |
| attempts = ("flash_attention_2", "sdpa") if s.prefer_flash else ("sdpa",) | |
| last_failure: BaseException | None = None | |
| for attn in attempts: | |
| try: | |
| engine = Qwen3TTSModel.from_pretrained( | |
| pretrained_model_name_or_path=str(self.root), | |
| device_map=device_map, | |
| dtype=torch_dtype, | |
| attn_implementation=attn, | |
| ) | |
| dtype_tag = "bf16" if torch_dtype is torch.bfloat16 else "fp32" | |
| print(f"[Miner] qwen3-tts ready :: device={device_map} dtype={dtype_tag} attn={attn}") | |
| return engine | |
| except Exception as exc: | |
| last_failure = exc | |
| raise RuntimeError(f"qwen3-tts failed to load :: {last_failure!r}") | |
| def _load_yaml(path: Path) -> dict[str, Any]: | |
| if not path.is_file(): | |
| return {} | |
| from yaml import safe_load | |
| with path.open("r", encoding="utf-8") as fh: | |
| return safe_load(fh) or {} | |