Soprano-TTS

Sleeping

Soprano-TTS / soprano /server.py

Upload 17 files

5d9390a verified about 1 month ago

1.48 kB

	import base64
	import io
	import json
	from typing import Generator

	import numpy as np
	from fastapi import FastAPI, HTTPException
	from fastapi.responses import Response
	from scipy.io.wavfile import write
	from torch import Tensor

	from soprano.tts import SopranoTTS

	# Load model at startup
	tts = SopranoTTS(cache_size_mb = 100)

	app = FastAPI(title="Soprano TTS API")

	def _tensor_to_wav_bytes(tensor: Tensor) -> bytes:
	"""
	Convert a 1D fp32 torch tensor to a WAV byte stream.
	"""
	# convert to int16
	audio_int16 = (np.clip(tensor.numpy(), -1.0, 1.0) * 32767).astype(np.int16)

	wav_io = io.BytesIO()
	write(wav_io, 32000, audio_int16) # 32kHz sample rate
	wav_io.seek(0)
	return wav_io.read()


	@app.post("/v1/audio/speech")
	async def create_speech(payload: dict):
	"""
	Minimal implementation of OpenAI's Speech endpoint.
	Fields:
	- input: string - text to synthesize
	- model, voice, etc. are accepted but ignored.
	- response_format: str - ignored, only support wav.
	"""
	text = payload.get("input")
	if not isinstance(text, str) or not text.strip():
	raise HTTPException(status_code=400, detail="`input` field must be a non-empty string.")

	audio_tensor = tts.infer(text)
	wav_bytes = _tensor_to_wav_bytes(audio_tensor)
	return Response(content=wav_bytes, media_type="audio/wav", headers={"Content-Disposition": 'attachment; filename="speech.wav"'})