Text-to-Speech
Transformers
Safetensors
Qwen3-TTS
English
text-generation
tts
prompttts
qwen3-tts
voice-design
vocence
Instructions to use michael-chan-000/tts-v21 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use michael-chan-000/tts-v21 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-to-speech", model="michael-chan-000/tts-v21")# Load model directly from transformers import AutoModelForSeq2SeqLM model = AutoModelForSeq2SeqLM.from_pretrained("michael-chan-000/tts-v21", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """demo.py — quick smoke test for vocence_miner_v1. | |
| Reads the merged checkpoint either from a local path or from the Hugging Face Hub, | |
| then generates a small set of preset clips that exercise the prompt-following range. | |
| pip install qwen-tts transformers torch soundfile | |
| python demo.py # uses the current directory | |
| python demo.py --source magma90909/vocence_miner_v8 # pull from HF | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import dataclasses | |
| import sys | |
| from pathlib import Path | |
| import soundfile as sf | |
| import torch | |
| from qwen_tts import Qwen3TTSModel | |
| class Sample: | |
| slug: str | |
| say: str | |
| voice: str | |
| SAMPLES: tuple[Sample, ...] = ( | |
| Sample( | |
| slug="warm_male_storyteller", | |
| say="Long ago, in a kingdom by the sea, a young girl made a remarkable discovery.", | |
| voice="An older male narrator reads a bedtime story slowly, with warmth.", | |
| ), | |
| Sample( | |
| slug="whisper_female", | |
| say="Don't say a word. Just listen carefully.", | |
| voice="A young woman whispers, conspiratorial, low energy, very quiet.", | |
| ), | |
| Sample( | |
| slug="projecting_announcer", | |
| say="And he scores in the final second of the match!", | |
| voice="A high-pitched announcer projects an exciting headline at a fast pace.", | |
| ), | |
| ) | |
| SAMPLER = dict( | |
| temperature=0.85, | |
| top_k=50, | |
| top_p=0.95, | |
| repetition_penalty=1.05, | |
| max_new_tokens=600, | |
| do_sample=True, | |
| ) | |
| def parse_args(argv: list[str] | None = None) -> argparse.Namespace: | |
| p = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0]) | |
| p.add_argument("--source", default=".", help="HF repo id or local checkpoint dir") | |
| p.add_argument("--out", default="./demo_out", help="output dir for wav files") | |
| p.add_argument("--precision", default="bfloat16", choices=("bfloat16", "float16", "float32")) | |
| p.add_argument("--device", default="cuda:0" if torch.cuda.is_available() else "cpu") | |
| return p.parse_args(argv) | |
| def load(source: str, device: str, precision: str) -> Qwen3TTSModel: | |
| dtype = {"bfloat16": torch.bfloat16, "float16": torch.float16, "float32": torch.float32}[precision] | |
| print(f"[demo] loading {source!r} -> {device} ({precision})", flush=True) | |
| return Qwen3TTSModel.from_pretrained(source, device_map=device, dtype=dtype) | |
| def synth_one(model: Qwen3TTSModel, sample: Sample, out_dir: Path) -> Path: | |
| wavs, sr = model.generate_voice_design( | |
| text=sample.say, | |
| instruct=sample.voice, | |
| language="english", | |
| **SAMPLER, | |
| ) | |
| target = out_dir / f"{sample.slug}.wav" | |
| sf.write(target, wavs[0], sr) | |
| duration = len(wavs[0]) / sr | |
| print(f" -> {target.name} ({duration:.2f}s @ {sr} Hz)") | |
| return target | |
| def run(args: argparse.Namespace) -> int: | |
| out_dir = Path(args.out) | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| model = load(args.source, args.device, args.precision) | |
| for sample in SAMPLES: | |
| synth_one(model, sample, out_dir) | |
| print(f"[demo] {len(SAMPLES)} clips written to {out_dir}/", flush=True) | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(run(parse_args())) | |