Upload folder using huggingface_hub

79a876c verified 1 day ago

4.43 kB

	from __future__ import annotations

	import threading
	from functools import cached_property
	from pathlib import Path
	from types import SimpleNamespace
	from typing import Any

	import numpy as np


	class Miner:

	REPO_SENTINEL = "config.json"
	SETTINGS_FILE = "vocence_config.yaml"
	WARMUP_TIMEOUT = 180.0

	def __init__(self, path_hf_repo: Path) -> None:
	self.root = Path(path_hf_repo).resolve()
	if not (self.root / self.REPO_SENTINEL).is_file():
	raise FileNotFoundError(f"{self.REPO_SENTINEL} not present in {self.root}")
	_ = self.settings
	_ = self.model

	def __repr__(self) -> str:
	return f"<Miner root={self.root.name} language={self.settings.language!r}>"

	@cached_property
	def settings(self) -> SimpleNamespace:
	raw = self._load_yaml(self.root / self.SETTINGS_FILE)
	rt = raw.get("runtime") or {}
	gen = raw.get("generation") or {}
	lim = raw.get("limits") or {}
	return SimpleNamespace(
	language=str(lim.get("default_language") or rt.get("default_language") or "English"),
	sample_rate=int(gen.get("sample_rate", 24000)),
	max_instruction_chars=int(lim.get("max_instruction_chars", 600)),
	max_text_chars=int(lim.get("max_text_chars", 2000)),
	prefer_cuda=str(rt.get("device_preference", "cuda")).lower() == "cuda",
	prefer_bf16=str(rt.get("dtype", "bfloat16")).lower() == "bfloat16",
	prefer_flash=bool(rt.get("use_flash_attention_2", False)),
	)

	@cached_property
	def model(self) -> Any:
	return self._instantiate_engine()

	def warmup(self) -> None:
	outcome: dict[str, Any] = {"done": False, "err": None}

	def _trial() -> None:
	try:
	self.generate_wav(instruction="Neutral voice.", text="Warming up.")
	outcome["done"] = True
	except Exception as exc:
	outcome["err"] = repr(exc)

	worker = threading.Thread(target=_trial, daemon=True)
	worker.start()
	worker.join(timeout=self.WARMUP_TIMEOUT)
	if not outcome["done"]:
	raise RuntimeError(
	f"warmup did not complete within {self.WARMUP_TIMEOUT}s: {outcome['err'] or 'no completion signal'}"
	)

	def generate_wav(self, instruction: str, text: str) -> tuple[np.ndarray, int]:
	s = self.settings
	prompt = instruction[: s.max_instruction_chars] if s.max_instruction_chars > 0 else instruction
	body = text[: s.max_text_chars] if s.max_text_chars > 0 else text
	wavs, sample_rate = self.model.generate_voice_design(
	text=body,
	instruct=prompt,
	language=s.language,
	)
	if not wavs or wavs[0] is None:
	raise ValueError("qwen3-tts produced no audio")
	wave = np.asarray(wavs[0], dtype=np.float32)
	if wave.ndim > 1:
	wave = wave.mean(axis=1)
	return wave, int(sample_rate)

	def _instantiate_engine(self) -> Any:
	import torch
	from qwen_tts import Qwen3TTSModel

	s = self.settings
	cuda_ready = bool(torch.cuda.is_available())
	device_map = "cuda:0" if (s.prefer_cuda and cuda_ready) else "cpu"
	torch_dtype = torch.bfloat16 if (s.prefer_bf16 and cuda_ready) else torch.float32
	attempts = ("flash_attention_2", "sdpa") if s.prefer_flash else ("sdpa",)

	last_failure: BaseException \| None = None
	for attn in attempts:
	try:
	engine = Qwen3TTSModel.from_pretrained(
	pretrained_model_name_or_path=str(self.root),
	device_map=device_map,
	dtype=torch_dtype,
	attn_implementation=attn,
	)
	dtype_tag = "bf16" if torch_dtype is torch.bfloat16 else "fp32"
	print(f"[Miner] qwen3-tts ready :: device={device_map} dtype={dtype_tag} attn={attn}")
	return engine
	except Exception as exc:
	last_failure = exc
	raise RuntimeError(f"qwen3-tts failed to load :: {last_failure!r}")

	@staticmethod
	def _load_yaml(path: Path) -> dict[str, Any]:
	if not path.is_file():
	return {}
	from yaml import safe_load
	with path.open("r", encoding="utf-8") as fh:
	return safe_load(fh) or {}