Spaces:

luis-poe
/

warmshower_edgetts

Sleeping

App Files Files Community

warmshower_edgetts / compliment.py

luis-poe

Update compliment.py

7e5f39d verified 11 months ago

raw

history blame contribute delete

2.66 kB

	import asyncio
	from groq import Groq
	import edge_tts
	import tempfile
	import os

	# Create a Groq client once at the module level to reuse across function calls
	client = Groq()

	async def text_to_speech(text, language):
	# Map language to Edge TTS voice
	if language.lower() == 'de':
	voice = 'de-DE-KatjaNeural' # German female voice
	else:
	voice = 'en-US-AriaNeural' # English female voice

	rate = "+10%"
	pitch = "+0Hz"

	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
	tmp_path = tmp_file.name

	communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
	await communicate.save(tmp_path)

	# Do not delete the file yet; Gradio needs to access it
	return tmp_path # Return the path to the audio file

	async def generate_compliment_and_audio(base64_image, compliment_prompt, model="llama-3.2-90b-vision-preview", max_tokens=300, temperature=0.5, tts_language='en'):
	"""
	Generate a compliment and its audio, starting TTS processing as soon as possible to reduce latency.

	Args:
	- base64_image (str): The base64 encoded image.
	- compliment_prompt (str): The prompt for generating the compliment.
	- model (str, optional): The model to use for the chat completion.
	- max_tokens (int, optional): The maximum number of tokens to generate.
	- temperature (float, optional): The sampling temperature.
	- tts_language (str, optional): The language code for TTS.

	Returns:
	- Tuple[str, str]: The generated compliment and the audio file path.
	"""

	# Prepare the messages payload
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": compliment_prompt},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}",
	},
	},
	],
	}
	]

	# Since the Groq client does not support async methods, use asyncio.to_thread
	def fetch_compliment():
	return client.chat.completions.create(
	max_tokens=max_tokens,
	temperature=temperature,
	messages=messages,
	model=model,
	)

	# Call the synchronous function in a separate thread
	chat_completion = await asyncio.to_thread(fetch_compliment)

	# Extract the compliment
	compliment = chat_completion.choices[0].message.content

	# Start the TTS processing asynchronously
	audio_file_path = await text_to_speech(compliment, tts_language)

	return compliment, audio_file_path