Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

neu / app.py

Rajhuggingface4253

Update app.py

ff7d020 verified 2 months ago

raw

history blame

2 kB

	import tempfile
	import soundfile as sf
	from fastapi import FastAPI, HTTPException
	from fastapi.responses import FileResponse
	from pydantic import BaseModel
	from neuttsair.neutts import NeuTTSAir

	# Initialize FastAPI app
	app = FastAPI(title="NeuTTS-Air API", description="A FastAPI service for the NeuTTS-Air model.")

	# Load the NeuTTS-Air model
	# The path is relative to the working directory in the Docker container
	MODEL_PATH = "neutts-air-q4-gguf"
	try:
	tts = NeuTTSAir(backbone_repo=MODEL_PATH, backbone_device="cpu")
	except Exception as e:
	print(f"Error loading model: {e}")
	tts = None

	# Pydantic model for the request body
	class TTSRequest(BaseModel):
	text: str
	ref_audio_path: str
	ref_text: str

	@app.get("/")
	def read_root():
	"""Simple health check endpoint."""
	return {"message": "NeuTTS-Air FastAPI is running."}

	@app.post("/tts", summary="Generate speech from text")
	async def tts_endpoint(request: TTSRequest):
	"""
	Generates a WAV audio file from text using a reference audio and transcript.
	"""
	if tts is None:
	raise HTTPException(status_code=503, detail="Model is not loaded.")

	try:
	# Load the reference audio
	# Note: You must provide a valid path to an audio file
	# The user will need to upload their own reference audios or use pre-uploaded ones
	ref_codes = tts.encode_reference(request.ref_audio_path)

	# Perform inference
	wav_audio = tts.infer(request.text, ref_codes, request.ref_text)

	# Save the audio to a temporary file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	sf.write(tmp.name, wav_audio, tts.codec.sampling_rate)
	filepath = tmp.name

	# Return the audio file
	return FileResponse(filepath, media_type="audio/wav", filename="generated_speech.wav")

	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Internal Server Error: {e}")