warmshower_edgetts / compliment.py
luis-poe's picture
Update compliment.py
7e5f39d verified
import asyncio
from groq import Groq
import edge_tts
import tempfile
import os
# Create a Groq client once at the module level to reuse across function calls
client = Groq()
async def text_to_speech(text, language):
# Map language to Edge TTS voice
if language.lower() == 'de':
voice = 'de-DE-KatjaNeural' # German female voice
else:
voice = 'en-US-AriaNeural' # English female voice
rate = "+10%"
pitch = "+0Hz"
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
await communicate.save(tmp_path)
# Do not delete the file yet; Gradio needs to access it
return tmp_path # Return the path to the audio file
async def generate_compliment_and_audio(base64_image, compliment_prompt, model="llama-3.2-90b-vision-preview", max_tokens=300, temperature=0.5, tts_language='en'):
"""
Generate a compliment and its audio, starting TTS processing as soon as possible to reduce latency.
Args:
- base64_image (str): The base64 encoded image.
- compliment_prompt (str): The prompt for generating the compliment.
- model (str, optional): The model to use for the chat completion.
- max_tokens (int, optional): The maximum number of tokens to generate.
- temperature (float, optional): The sampling temperature.
- tts_language (str, optional): The language code for TTS.
Returns:
- Tuple[str, str]: The generated compliment and the audio file path.
"""
# Prepare the messages payload
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": compliment_prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
},
},
],
}
]
# Since the Groq client does not support async methods, use asyncio.to_thread
def fetch_compliment():
return client.chat.completions.create(
max_tokens=max_tokens,
temperature=temperature,
messages=messages,
model=model,
)
# Call the synchronous function in a separate thread
chat_completion = await asyncio.to_thread(fetch_compliment)
# Extract the compliment
compliment = chat_completion.choices[0].message.content
# Start the TTS processing asynchronously
audio_file_path = await text_to_speech(compliment, tts_language)
return compliment, audio_file_path