Spaces:

Hexa09
/

gg-space

Sleeping

App Files Files Community

gg-space / app.py

Hexa06

Deploy TTS service

42e66bb 3 months ago

raw

history blame contribute delete

6.04 kB

	from fastapi import FastAPI, HTTPException, Form, BackgroundTasks
	from fastapi.responses import FileResponse
	from kokoro_onnx import Kokoro
	import tempfile
	import os
	from datetime import datetime
	import soundfile as sf

	# ============== CONFIG ==============
	MAX_CHARS = 4500 # ~5 minutes of audio (speaking rate: ~900 chars/min)
	MIN_CHARS = 5
	MAX_AUDIO_DURATION = 300 # 5 minutes of audio

	# ============== KOKORO TTS MODEL ==============
	print("🎤 Loading Kokoro TTS model...")
	try:
	kokoro = Kokoro("kokoro-v0_19.onnx", "voices")
	print("✅ Kokoro TTS loaded successfully!")
	except Exception as e:
	print(f"⚠️ Kokoro not found locally. Will download on first use.")
	kokoro = None

	app = FastAPI(
	title="Kokoro TTS API - Fast & Simple",
	description="High-speed text-to-speech with emotional voices",
	version="2.0"
	)

	@app.on_event("startup")
	def startup():
	global kokoro
	if kokoro is None:
	import urllib.request

	print("📥 Downloading Kokoro TTS model files...")

	# Create directory for voices
	os.makedirs("voices", exist_ok=True)

	# Download voices file
	voices_file = "voices/voices.bin"
	if not os.path.exists(voices_file):
	print("Downloading voices.bin...")
	urllib.request.urlretrieve(
	"https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin",
	voices_file
	)
	print("✅ Voices downloaded!")

	# Download ONNX model
	model_file = "kokoro-v0_19.onnx"
	if not os.path.exists(model_file):
	print("Downloading kokoro-v0_19.onnx...")
	urllib.request.urlretrieve(
	"https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx",
	model_file
	)
	print("✅ Model downloaded!")

	print("🎤 Initializing Kokoro TTS...")
	kokoro = Kokoro(model_file, voices_file)
	print("✅ Kokoro TTS loaded!")

	# ============== HELPERS ==============
	def cleanup_file(path: str):
	"""Delete temporary file after response is sent"""
	try:
	if os.path.exists(path):
	os.unlink(path)
	except:
	pass

	def generate_speech(text: str, voice: str = "bf_isabella", speed: float = 1.0) -> str:
	"""
	Generate speech using Kokoro TTS
	Available voices: af_heart, af_bella, am_adam, am_michael, bf_emma, bf_isabella
	"""
	if len(text) < MIN_CHARS:
	raise ValueError(f"Text too short. Minimum {MIN_CHARS} characters.")
	if len(text) > MAX_CHARS:
	raise ValueError(f"Text too long. Maximum {MAX_CHARS} characters (~5 min audio).")

	# Generate audio samples
	samples, sample_rate = kokoro.create(
	text=text,
	voice=voice,
	speed=speed,
	lang="en-us"
	)

	# Save to temporary file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	sf.write(tmp.name, samples, sample_rate)
	return tmp.name

	# ============== API ENDPOINTS ==============
	@app.get("/")
	def root():
	return {
	"service": "Kokoro TTS API",
	"status": "running",
	"model": "Kokoro-82M",
	"version": "2.0",
	"features": {
	"speed": "10x faster than XTTS",
	"voices": 6,
	"max_chars": MAX_CHARS,
	"emotional": True
	},
	"endpoints": {
	"health": "/health",
	"generate": "/api/generate (POST)",
	"docs": "/docs"
	}
	}

	@app.get("/health")
	def health():
	return {
	"status": "healthy",
	"model": "Kokoro TTS 82M",
	"speed": "10x faster than XTTS",
	"max_chars": MAX_CHARS,
	"voices": ["af_heart", "af_bella", "am_adam", "am_michael", "bf_emma", "bf_isabella"]
	}

	@app.post("/api/generate")
	async def generate_tts(
	background_tasks: BackgroundTasks,
	text: str = Form(..., description="Text to convert to speech"),
	voice: str = Form("bf_isabella", description="Voice to use"),
	speed: float = Form(1.0, description="Speech speed (0.5-2.0)")
	):
	"""
	Generate TTS with Kokoro (Fast & Emotional)

	Performance:
	- Max audio: 5 minutes (4500 chars)
	- Generation: ~20-30 seconds on CPU
	- Speech rate: ~900 chars/minute

	Available Voices:
	- `af_heart`: American Female (warm)
	- `af_bella`: American Female (professional)
	- `am_adam`: American Male (confident)
	- `am_michael`: American Male (friendly)
	- `bf_emma`: British Female (elegant)
	- `bf_isabella`: British Female (storytelling) ⭐ Best for long content

	Example:
	```bash
	curl -X POST https://your-space.hf.space/api/generate \\
	-F "text=Hello world, this is Kokoro TTS!" \\
	-F "voice=bf_isabella" \\
	-F "speed=1.0" \\
	--output audio.wav
	```
	"""
	try:
	# Validate speed
	if speed < 0.5 or speed > 2.0:
	raise HTTPException(status_code=400, detail="Speed must be between 0.5 and 2.0")

	# Generate speech
	output_path = generate_speech(text.strip(), voice, speed)

	# Schedule cleanup after response is sent
	background_tasks.add_task(cleanup_file, output_path)

	# Return audio file
	response = FileResponse(
	output_path,
	media_type="audio/wav",
	filename=f"kokoro_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
	)
	response.headers["X-Character-Count"] = str(len(text))
	response.headers["X-Voice-Used"] = voice

	return response

	except ValueError as e:
	raise HTTPException(status_code=400, detail=str(e))
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)