Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

neu / app.py

Rajhuggingface4253

Create app.py

3b32b80 verified 2 months ago

raw

history blame

5.12 kB

	import os
	import sys
	import uuid
	from typing import Optional
	import numpy as np
	from fastapi import FastAPI, HTTPException, UploadFile, File, Form
	from fastapi.responses import FileResponse, JSONResponse
	from fastapi.middleware.cors import CORSMiddleware
	import soundfile as sf
	import io

	# Add neutts-air to path (same as your working code)
	sys.path.append("neutts-air")

	try:
	from neuttsair.neutts import NeuTTSAir
	except ImportError as e:
	raise RuntimeError(f"Failed to import NeuTTS Air: {e}. Make sure neutts-air submodule is initialized.")

	# Initialize FastAPI app
	app = FastAPI(
	title="NeuTTS Air Production API",
	description="Production-ready Text-to-Speech with Voice Cloning",
	version="1.0.0"
	)

	# CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Global model instance (same initialization as your working code)
	tts = NeuTTSAir(
	backbone_repo="neuphonic/neutts-air",
	backbone_device="cpu", # Changed to CPU for Hugging Face Spaces
	codec_repo="neuphonic/neucodec",
	codec_device="cpu" # Changed to CPU for Hugging Face Spaces
	)

	# Create directories
	os.makedirs("uploads", exist_ok=True)
	os.makedirs("outputs", exist_ok=True)

	@app.get("/")
	async def root():
	return {"status": "online", "service": "NeuTTS Air API"}

	@app.get("/health")
	async def health_check():
	return {"status": "healthy", "model_loaded": tts is not None}

	@app.post("/api/v1/synthesize")
	async def synthesize_speech(
	ref_text: str = Form(..., description="Reference audio transcript"),
	gen_text: str = Form(..., description="Text to synthesize"),
	ref_audio: UploadFile = File(..., description="Reference audio file (WAV)")
	):
	"""
	Synthesize speech using voice cloning
	"""
	try:
	# Validate audio file
	if not ref_audio.filename.lower().endswith('.wav'):
	raise HTTPException(400, "Only WAV files are supported as reference audio")

	# Save uploaded file
	upload_path = f"uploads/{uuid.uuid4()}_{ref_audio.filename}"
	with open(upload_path, "wb") as f:
	content = await ref_audio.read()
	f.write(content)

	# Perform inference (same pattern as your working code)
	ref_codes = tts.encode_reference(upload_path)
	wav = tts.infer(gen_text, ref_codes, ref_text)

	# Save output
	output_path = f"outputs/{uuid.uuid4()}.wav"
	sf.write(output_path, wav, 24000)

	return FileResponse(
	output_path,
	media_type="audio/wav",
	filename="synthesized_speech.wav"
	)

	except Exception as e:
	raise HTTPException(500, f"Synthesis failed: {str(e)}")

	@app.post("/api/v1/synthesize/b64")
	async def synthesize_speech_base64(
	ref_text: str = Form(...),
	gen_text: str = Form(...),
	ref_audio: UploadFile = File(...)
	):
	"""
	Synthesize speech and return as base64 encoded audio
	"""
	try:
	# Save uploaded file
	upload_path = f"uploads/{uuid.uuid4()}_{ref_audio.filename}"
	with open(upload_path, "wb") as f:
	content = await ref_audio.read()
	f.write(content)

	# Perform inference
	ref_codes = tts.encode_reference(upload_path)
	wav = tts.infer(gen_text, ref_codes, ref_text)

	# Convert to base64
	buffer = io.BytesIO()
	sf.write(buffer, wav, 24000, format='WAV')
	buffer.seek(0)

	import base64
	audio_b64 = base64.b64encode(buffer.read()).decode('utf-8')

	return JSONResponse({
	"audio_data": audio_b64,
	"sample_rate": 24000,
	"format": "wav"
	})

	except Exception as e:
	raise HTTPException(500, f"Synthesis failed: {str(e)}")

	# Batch processing endpoint
	@app.post("/api/v1/batch-synthesize")
	async def batch_synthesize(
	ref_text: str = Form(...),
	ref_audio: UploadFile = File(...),
	texts: str = Form(..., description="JSON array of texts to synthesize")
	):
	"""
	Synthesize multiple texts with the same voice
	"""
	try:
	import json
	text_list = json.loads(texts)

	# Save reference audio
	upload_path = f"uploads/{uuid.uuid4()}_{ref_audio.filename}"
	with open(upload_path, "wb") as f:
	content = await ref_audio.read()
	f.write(content)

	# Encode reference once
	ref_codes = tts.encode_reference(upload_path)

	results = []
	for i, text in enumerate(text_list):
	wav = tts.infer(text, ref_codes, ref_text)
	output_path = f"outputs/{uuid.uuid4()}.wav"
	sf.write(output_path, wav, 24000)
	results.append(output_path)

	return {"generated_files": results}

	except Exception as e:
	raise HTTPException(500, f"Batch synthesis failed: {str(e)}")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)