Spaces:

aankitdas
/

tts-eval-framework

Sleeping

tts-eval-framework / src /kokoro_client.py

first commit - working app locally

a3419b6 about 1 month ago

1.32 kB

	# src/kokoro_client.py
	# TTS client for Kokoro — neural TTS, runs locally on GPU/CPU.
	# Significantly more natural than pyttsx3 baseline.

	import time
	import numpy as np
	import soundfile as sf
	from kokoro import KPipeline

	# initialize pipeline once at module level (expensive to reload)
	# lang_code "a" = American English
	_pipeline = KPipeline(lang_code="a")


	def synthesize(text: str, output_path: str, voice: str = "af_heart", speed: float = 1.0) -> dict:
	"""
	Synthesize text to a .wav file using Kokoro neural TTS.

	Args:
	text: the string to synthesize
	output_path: where to save the .wav file
	voice: kokoro voice ID (default af_heart — warm American female)
	speed: speaking rate multiplier (default 1.0)

	Returns:
	dict with keys: output_path, latency_seconds, engine, voice
	"""
	start = time.time()

	generator = _pipeline(text, voice=voice, speed=speed)

	chunks = []
	for _, _, audio in generator:
	if audio is not None:
	chunks.append(audio)

	audio_out = np.concatenate(chunks)
	sf.write(output_path, audio_out, 24000)

	latency = time.time() - start

	return {
	"output_path": output_path,
	"latency_seconds": round(latency, 3),
	"engine": "kokoro",
	"voice": voice,
	}