Text-to-Speech
Transformers
Safetensors
Qwen3-TTS
English
text-generation
tts
qwen
qwen3
qwen3-tts
voice-design
lora
fine-tuned
audio
Instructions to use ShinyUser/vocence-miner02 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ShinyUser/vocence-miner02 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-to-speech", model="ShinyUser/vocence-miner02")# Load model directly from transformers import AutoModelForSeq2SeqLM model = AutoModelForSeq2SeqLM.from_pretrained("ShinyUser/vocence-miner02", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """Minimal inference example for qwen3_voice_design_t1. | |
| Install: | |
| pip install qwen-tts transformers torch soundfile | |
| Run: | |
| python example_inference.py # loads from local dir (./) | |
| python example_inference.py --repo macminix/qwen3_voice_design_t1 # or pull from HF | |
| The model is self-contained. No base model download is required. | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| from pathlib import Path | |
| import soundfile as sf | |
| import torch | |
| from qwen_tts import Qwen3TTSModel | |
| PROMPTS = [ | |
| dict( | |
| name="happy_male", | |
| text="Come and look at this, you are not going to believe it.", | |
| instruct="A male speaker delivers his happy speech at a moderate pace with standard energy.", | |
| ), | |
| dict( | |
| name="sad_female_slow", | |
| text="I'm sorry. I tried everything I could think of.", | |
| instruct="A female voice speaks slowly with a sad, quiet tone.", | |
| ), | |
| dict( | |
| name="angry_male_low_fast", | |
| text="You were warned, and you did it anyway.", | |
| instruct="A low-pitched male speaker, angry and forceful, speaking at a fast pace.", | |
| ), | |
| ] | |
| GEN_KWARGS = dict( | |
| language="english", | |
| temperature=0.9, | |
| top_k=50, | |
| top_p=1.0, | |
| repetition_penalty=1.05, | |
| max_new_tokens=600, | |
| do_sample=True, | |
| ) | |
| def main() -> None: | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument("--repo", default=".", help="HF repo id or local path (default: current dir)") | |
| ap.add_argument("--out-dir", default="./out", help="where to write wavs") | |
| ap.add_argument("--dtype", default="bfloat16", choices=["bfloat16", "float16", "float32"]) | |
| ap.add_argument("--device", default="cuda:0" if torch.cuda.is_available() else "cpu", | |
| help="torch device (default: cuda:0 if available, else cpu)") | |
| args = ap.parse_args() | |
| dtype = {"bfloat16": torch.bfloat16, "float16": torch.float16, "float32": torch.float32}[args.dtype] | |
| print(f"loading model from {args.repo} (device={args.device}, dtype={args.dtype})") | |
| wrap = Qwen3TTSModel.from_pretrained(args.repo, device_map=args.device, dtype=dtype) | |
| out_dir = Path(args.out_dir) | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| for p in PROMPTS: | |
| wavs, sr = wrap.generate_voice_design(text=p["text"], instruct=p["instruct"], **GEN_KWARGS) | |
| path = out_dir / f"{p['name']}.wav" | |
| sf.write(path, wavs[0], sr) | |
| print(f" {path} ({len(wavs[0]) / sr:.1f} s @ {sr} Hz)") | |
| print("done") | |
| if __name__ == "__main__": | |
| main() | |