Spaces:

build-small-hackathon
/

tiny-army

Running

App Files Files Community

tiny-army / tts_server.py

polats

Add Qwen3-TTS local bridge: ?tts= override + tts_server.py

3bc1b31 5 days ago

raw

history blame contribute delete

4.25 kB

	"""Local Qwen3-TTS Voice Design server — the LeLab-style bridge.

	Runs the OPEN WEIGHTS on YOUR machine's GPU; the hosted Tiny Army UI calls it via a
	`?tts=` override, so voices are designed locally and off the grid (no DashScope key/cost).

	Quick start (on a CUDA box; MPS/CPU work but are slow):

	pip install qwen-tts soundfile "fastapi[standard]" uvicorn torch
	python tts_server.py # serves http://localhost:8800/qwen-tts

	Then open the app pointed at this server:

	http://localhost:7860/?tts=http://localhost:8800 # local UI + local TTS
	https://tinyarmy.noods.cc/?tts=http://localhost:8800 # hosted UI + your GPU
	(browsers block https→http://localhost by default; run Chrome with
	--unsafely-treat-insecure-origin-as-secure=http://localhost:8800 or serve TLS)

	Smoke-test the bridge WITHOUT a GPU (returns a short tone instead of speech):

	QWEN_TTS_STUB=1 python tts_server.py

	Env: PORT (8800), QWEN_TTS_MODEL (Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign), QWEN_TTS_STUB.
	"""
	import asyncio
	import io
	import math
	import os
	import struct

	from fastapi import FastAPI, Request
	from fastapi.responses import Response
	from fastapi.middleware.cors import CORSMiddleware

	MODEL_ID = os.environ.get("QWEN_TTS_MODEL", "Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign")
	STUB = os.environ.get("QWEN_TTS_STUB", "") not in ("", "0", "false", "False")
	PORT = int(os.environ.get("PORT", "8800"))

	app = FastAPI()
	# The hosted UI is a different origin — allow it (and any localhost dev port).
	app.add_middleware(
	CORSMiddleware,
	allow_origins=[""], allow_methods=[""], allow_headers=["*"], allow_credentials=False,
	)

	_model = None
	_load_lock = asyncio.Lock()


	def _load_model():
	global _model
	if _model is not None:
	return _model
	import torch
	from qwen_tts import Qwen3TTSModel
	if torch.cuda.is_available():
	dev, dtype = "cuda:0", torch.bfloat16
	elif getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
	dev, dtype = "mps", torch.float32
	else:
	dev, dtype = "cpu", torch.float32
	print(f"[tts] loading {MODEL_ID} on {dev} ({dtype})…", flush=True)
	_model = Qwen3TTSModel.from_pretrained(MODEL_ID, device_map=dev, dtype=dtype)
	print("[tts] model ready", flush=True)
	return _model


	def _stub_wav(text, sr=24000):
	"""A short A4 tone — proves the bridge end-to-end without loading the model."""
	secs = min(4.0, max(0.6, len(text) / 18.0))
	n = int(sr * secs)
	buf = io.BytesIO()
	data = b"".join(struct.pack("<h", int(0.25 * 32767 * math.sin(2 * math.pi * 440 * i / sr))) for i in range(n))
	buf.write(b"RIFF"); buf.write(struct.pack("<I", 36 + len(data))); buf.write(b"WAVE")
	buf.write(b"fmt "); buf.write(struct.pack("<IHHIIHH", 16, 1, 1, sr, sr * 2, 2, 16))
	buf.write(b"data"); buf.write(struct.pack("<I", len(data))); buf.write(data)
	return buf.getvalue()


	def _synth(text, instruct, language):
	if STUB:
	return _stub_wav(text)
	import soundfile as sf
	wavs, sr = _load_model().generate_voice_design(text=text, language=language, instruct=instruct)
	out = io.BytesIO(); sf.write(out, wavs[0], sr, format="WAV")
	return out.getvalue()


	@app.get("/health")
	def health():
	return {"ok": True, "model": MODEL_ID, "stub": STUB, "loaded": _model is not None}


	@app.post("/qwen-tts")
	async def qwen_tts(request: Request):
	body = await request.json()
	text = (body.get("text") or "").strip()
	instruct = (body.get("instruct") or "").strip() or "A clear, natural voice at a moderate pace."
	language = body.get("language") or "English"
	if not text:
	return Response("text required", status_code=400)
	if not STUB:
	async with _load_lock: # one CPU/GPU model can't decode in parallel
	wav = await asyncio.to_thread(_synth, text, instruct, language)
	else:
	wav = _synth(text, instruct, language)
	return Response(wav, media_type="audio/wav", headers={"Cache-Control": "no-store"})


	if __name__ == "__main__":
	import uvicorn
	print(f"[tts] http://localhost:{PORT}/qwen-tts (stub={STUB}, model={MODEL_ID})", flush=True)
	uvicorn.run(app, host="0.0.0.0", port=PORT)