Spaces:

GexSay
/

gexsaytts

Runtime error

App Files Files Community

gexsaytts / app.py

GexSay

Update app.py

cffa8ac verified 6 months ago

raw

history blame contribute delete

3.76 kB

	from fastapi import FastAPI, HTTPException
	from fastapi.responses import FileResponse
	from fastapi.middleware.cors import CORSMiddleware
	import uvicorn
	import os
	import tempfile
	import pickle
	from vinorm import TTSnorm
	from f5_tts.model import DiT
	from f5_tts.infer.utils_infer import load_vocoder, load_model, infer_process
	from huggingface_hub import hf_hub_download, snapshot_download
	import soundfile as sf

	# Load models VÀ voice cùng lúc
	hf_token = os.environ.get("HF_TOKEN")

	print("🔄 Đang tải models và voice...")

	# 1. Load TTS model
	vocoder = load_vocoder()
	model_ckpt = hf_hub_download(repo_id="GexSay/stt1beta", filename="model_last.pt", repo_type="model", token=hf_token)
	vocab_file = hf_hub_download(repo_id="GexSay/stt1beta", filename="config.json", repo_type="model", token=hf_token)
	model = load_model(DiT, dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4), ckpt_path=model_ckpt, vocab_file=vocab_file)

	pkl_dict = {}

	app = FastAPI(title="Bankme TTS API")

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	def post_process(text: str):
	text = " " + text + " "
	text = text.replace(" . . ", " . ").replace(" .. ", " . ")
	text = text.replace(" , , ", " , ").replace(" ,, ", " , ")
	text = text.replace('"', "")
	return " ".join(text.split())

	@app.get("/")
	async def root():
	return {"message": "Bankme TTS", "status": "running"}

	@app.post("/tts")
	async def generate_tts(voice: str, text: str, speed: float = 1.0):
	try:
	# Validate input
	if not voice:
	raise HTTPException(status_code=400, detail="Voice is required")
	if not text.strip():
	raise HTTPException(status_code=400, detail="Text is required")

	if voice in pkl_dict:
	pkl_path = pkl_dict[voice]
	else:
	print(f"🔄 Voice '{voice}' chưa có local, thử tải từ HF Hub...")
	try:
	pkl_path = hf_hub_download(
	repo_id="GexSay/stt1beta",
	filename=f"voice/{voice}.pkl",
	repo_type="model",
	token=hf_token
	)
	pkl_dict[voice] = pkl_path
	print(f"✅ Đã tải voice '{voice}' thành công")
	except Exception as e:
	print(f"❌ Không thể tải voice '{voice}' từ HF Hub: {e}")
	raise HTTPException(
	status_code=404,
	detail=f"Voice '{voice}' not found and cannot be downloaded. Available voice: {available_voice}"
	)

	# Load voice data từ pickle
	with open(pkl_path, "rb") as f:
	audio, sr, ref_text = pickle.load(f)

	# Process text
	processed_text = post_process(TTSnorm(text, punc=True)).lower()

	# Generate audio
	final_wave, final_sr, _ = infer_process(
	audio, sr,
	ref_text.lower(),
	processed_text,
	model,
	vocoder,
	nfe_step=8,
	speed=speed
	)

	# Save to temporary file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
	sf.write(tmp_file.name, final_wave, final_sr)
	temp_path = tmp_file.name

	return FileResponse(
	temp_path,
	media_type="audio/wav",
	filename=f"tts_{voice}.wav"
	)

	except HTTPException:
	raise
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Generation error: {str(e)}")

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=7860)