voicecloneapi

Runtime error

App Files Files Community

voicecloneapi / main.py

Arafath10

Update main.py

88fc5fe verified 11 months ago

raw

history blame

2.18 kB

	from fastapi import FastAPI, Query, HTTPException
	from fastapi.responses import StreamingResponse
	from TTS.api import TTS
	import os
	from io import BytesIO
	from typing import Generator

	app = FastAPI()
	import os

	# By using XTTS you agree to CPML license https://coqui.ai/cpml
	os.environ["COQUI_TOS_AGREED"] = "1"

	# Initialize the TTS model
	tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False) # Set gpu=True if you have GPU support

	# Predefined path to the sample voice clone
	FIXED_SPEAKER_WAV = "Bible Verses About Community.wav"

	# Function to split text into chunks
	def split_text(text: str, words_per_chunk: int = 20):
	words = text.split()
	return [' '.join(words[i:i + words_per_chunk]) for i in range(0, len(words), words_per_chunk)]



	@app.post("/generate-audio/")
	async def generate_audio(
	text: str = Query(..., description="The input text to convert to speech."),
	language: str = Query("en", description="Language code for TTS (e.g., 'en' for English).")
	):
	if not os.path.exists(FIXED_SPEAKER_WAV):
	raise HTTPException(status_code=400, detail="Fixed speaker WAV file not found.")

	# StreamingResponse to stream audio chunks
	def audio_stream():
	if tts.is_multi_lingual and not language:
	raise ValueError("Language must be specified for multi-lingual models.")

	text_chunks = split_text(text, 20)

	for idx, chunk in enumerate(text_chunks):
	# Generate audio for each chunk and yield as bytes
	output_file = f"out_{idx}.wav"
	tts.tts_to_file(
	text=chunk,
	file_path=output_file,
	speaker_wav=FIXED_SPEAKER_WAV,
	language=language
	)
	print(output_file)
	# Read the file content and yield as binary
	with open(output_file, "rb") as audio_file:
	yield audio_file.read()
	# Optionally delete the file after streaming
	os.remove(output_file)

	return StreamingResponse(audio_stream(), media_type="audio/wav")