Text-to-Speech
Transformers
Safetensors
Qwen3-TTS
English
text-generation
tts
qwen
qwen3
qwen3-tts
voice-design
lora
fine-tuned
audio
Instructions to use Danieli1021/vocence_1.0 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Danieli1021/vocence_1.0 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-to-speech", model="Danieli1021/vocence_1.0")# Load model directly from transformers import AutoModelForSeq2SeqLM model = AutoModelForSeq2SeqLM.from_pretrained("Danieli1021/vocence_1.0", dtype="auto") - Notebooks
- Google Colab
- Kaggle
File size: 2,524 Bytes
df368b4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | """Minimal inference example for qwen3_voice_design_t1.
Install:
pip install qwen-tts transformers torch soundfile
Run:
python example_inference.py # loads from local dir (./)
python example_inference.py --repo macminix/qwen3_voice_design_t1 # or pull from HF
The model is self-contained. No base model download is required.
"""
from __future__ import annotations
import argparse
from pathlib import Path
import soundfile as sf
import torch
from qwen_tts import Qwen3TTSModel
PROMPTS = [
dict(
name="happy_male",
text="Come and look at this, you are not going to believe it.",
instruct="A male speaker delivers his happy speech at a moderate pace with standard energy.",
),
dict(
name="sad_female_slow",
text="I'm sorry. I tried everything I could think of.",
instruct="A female voice speaks slowly with a sad, quiet tone.",
),
dict(
name="angry_male_low_fast",
text="You were warned, and you did it anyway.",
instruct="A low-pitched male speaker, angry and forceful, speaking at a fast pace.",
),
]
GEN_KWARGS = dict(
language="english",
temperature=0.9,
top_k=50,
top_p=1.0,
repetition_penalty=1.05,
max_new_tokens=600,
do_sample=True,
)
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--repo", default=".", help="HF repo id or local path (default: current dir)")
ap.add_argument("--out-dir", default="./out", help="where to write wavs")
ap.add_argument("--dtype", default="bfloat16", choices=["bfloat16", "float16", "float32"])
ap.add_argument("--device", default="cuda:0" if torch.cuda.is_available() else "cpu",
help="torch device (default: cuda:0 if available, else cpu)")
args = ap.parse_args()
dtype = {"bfloat16": torch.bfloat16, "float16": torch.float16, "float32": torch.float32}[args.dtype]
print(f"loading model from {args.repo} (device={args.device}, dtype={args.dtype})")
wrap = Qwen3TTSModel.from_pretrained(args.repo, device_map=args.device, dtype=dtype)
out_dir = Path(args.out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
for p in PROMPTS:
wavs, sr = wrap.generate_voice_design(text=p["text"], instruct=p["instruct"], **GEN_KWARGS)
path = out_dir / f"{p['name']}.wav"
sf.write(path, wavs[0], sr)
print(f" {path} ({len(wavs[0]) / sr:.1f} s @ {sr} Hz)")
print("done")
if __name__ == "__main__":
main()
|