Spaces:

DP27
/

tts

No application file

App Files Files Community

tts / app.py

DP27

upload fullcode

c402391 verified about 1 year ago

raw

history blame contribute delete

4 kB

	from fastapi import FastAPI, HTTPException, Query
	from fastapi.responses import FileResponse
	from fastapi.staticfiles import StaticFiles
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	import os
	import uuid
	import tempfile
	from typing import Optional
	import soundfile as sf

	# Import your TTS dependencies
	from kokoro import KPipeline

	# Initialize the TTS pipeline
	pipeline = KPipeline(lang_code='a') # Make sure lang_code matches voice

	# Initialize FastAPI app
	app = FastAPI(title="Kokoro TTS API Service")

	# Add CORS middleware to allow frontend requests
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # In production, replace with your domains
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Mount static files directory
	app.mount("/static", StaticFiles(directory="static"), name="static")

	# Create temp directory to store audio files
	TEMP_DIR = tempfile.gettempdir()
	os.makedirs(TEMP_DIR, exist_ok=True)

	def tts(text, file_name, voice='af_bella', speed=0.9):
	"""
	Generate speech from text using Kokoro TTS

	Args:
	text (str): Text to convert to speech
	file_name (str): Path to save the output .wav file
	voice (str): Voice to use for TTS
	speed (float): Speed of speech

	Returns:
	str: Path to the generated audio file
	"""
	try:
	generator = pipeline(
	text, voice=voice,
	speed=speed, split_pattern=None
	)

	for i, (gs, ps, audio) in enumerate(generator):
	sf.write(file_name, audio, 24000) # save audio file

	return file_name
	except Exception as e:
	raise Exception(f"TTS generation failed: {str(e)}")

	class TTSRequest(BaseModel):
	text: str
	voice: str = "af_bella"
	speed: float = 0.9

	@app.post("/tts/")
	async def text_to_speech(request: TTSRequest):
	"""
	Convert text to speech and return a .wav file
	"""
	try:
	# Generate a unique filename
	filename = f"{uuid.uuid4()}.wav"
	output_path = os.path.join(TEMP_DIR, filename)

	# Generate speech using your TTS function
	tts(request.text, output_path, request.voice, request.speed)

	# Return the audio file
	return FileResponse(
	path=output_path,
	filename=filename,
	media_type="audio/wav"
	)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")

	@app.get("/tts-get/")
	async def text_to_speech_get(
	text: str = Query(..., description="Text to convert to speech"),
	voice: str = Query("af_bella", description="Voice to use for TTS"),
	speed: float = Query(0.9, description="Speed of speech (0.5-1.5)")
	):
	"""
	GET endpoint for text-to-speech conversion
	"""
	try:
	# Generate a unique filename
	filename = f"{uuid.uuid4()}.wav"
	output_path = os.path.join(TEMP_DIR, filename)

	# Generate speech using your TTS function
	tts(text, output_path, voice, speed)

	# Return the audio file
	return FileResponse(
	path=output_path,
	filename=filename,
	media_type="audio/wav"
	)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")

	@app.get("/voices/")
	async def available_voices():
	"""
	Return a list of available voices
	"""
	# This is a placeholder - you should replace with actual available voices
	# from your kokoro library
	return {
	"voices": ["af_bella"], # Add other available voices here
	"default": "af_bella"
	}

	@app.get("/")
	async def root():
	"""
	Serve the frontend HTML
	"""
	return FileResponse('static/index.html')