Spaces:
Sleeping
Sleeping
| import asyncio | |
| from groq import Groq | |
| import edge_tts | |
| import tempfile | |
| import os | |
| # Create a Groq client once at the module level to reuse across function calls | |
| client = Groq() | |
| async def text_to_speech(text, language): | |
| # Map language to Edge TTS voice | |
| if language.lower() == 'de': | |
| voice = 'de-DE-KatjaNeural' # German female voice | |
| else: | |
| voice = 'en-US-AriaNeural' # English female voice | |
| rate = "+10%" | |
| pitch = "+0Hz" | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
| tmp_path = tmp_file.name | |
| communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch) | |
| await communicate.save(tmp_path) | |
| # Do not delete the file yet; Gradio needs to access it | |
| return tmp_path # Return the path to the audio file | |
| async def generate_compliment_and_audio(base64_image, compliment_prompt, model="llama-3.2-90b-vision-preview", max_tokens=300, temperature=0.5, tts_language='en'): | |
| """ | |
| Generate a compliment and its audio, starting TTS processing as soon as possible to reduce latency. | |
| Args: | |
| - base64_image (str): The base64 encoded image. | |
| - compliment_prompt (str): The prompt for generating the compliment. | |
| - model (str, optional): The model to use for the chat completion. | |
| - max_tokens (int, optional): The maximum number of tokens to generate. | |
| - temperature (float, optional): The sampling temperature. | |
| - tts_language (str, optional): The language code for TTS. | |
| Returns: | |
| - Tuple[str, str]: The generated compliment and the audio file path. | |
| """ | |
| # Prepare the messages payload | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": compliment_prompt}, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{base64_image}", | |
| }, | |
| }, | |
| ], | |
| } | |
| ] | |
| # Since the Groq client does not support async methods, use asyncio.to_thread | |
| def fetch_compliment(): | |
| return client.chat.completions.create( | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| messages=messages, | |
| model=model, | |
| ) | |
| # Call the synchronous function in a separate thread | |
| chat_completion = await asyncio.to_thread(fetch_compliment) | |
| # Extract the compliment | |
| compliment = chat_completion.choices[0].message.content | |
| # Start the TTS processing asynchronously | |
| audio_file_path = await text_to_speech(compliment, tts_language) | |
| return compliment, audio_file_path |