Spaces:

luis-poe
/

warmshower_edgetts

Sleeping

App Files Files Community

luis-poe commited on Feb 18, 2025

Commit

7e5f39d

verified ·

1 Parent(s): 7bbf670

Update compliment.py

Browse files

Files changed (1) hide show

compliment.py +79 -153

compliment.py CHANGED Viewed

@@ -1,153 +1,79 @@
-import asyncio
-from groq import Groq
-import edge_tts
-import tempfile
-import os
-# Create a Groq client once at the module level to reuse across function calls
-client = Groq()
-async def text_to_speech(text, language):
-    # Map language to Edge TTS voice
-    if language.lower() == 'de':
-        voice = 'de-DE-KatjaNeural'  # German female voice
-    else:
-        voice = 'en-US-AriaNeural'  # English female voice
-    rate = "+0%"
-    pitch = "+0Hz"
-    communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
-        tmp_path = tmp_file.name
-        await communicate.save(tmp_path)
-    with open(tmp_path, 'rb') as f:
-        audio_data = f.read()
-    os.remove(tmp_path)
-    return audio_data
-async def generate_compliment(base64_image, compliment_prompt, model="llama-3.2-90b-vision-preview", max_tokens=300, temperature=0.5):
-    """
-    Generate a charming compliment about the most prominent person in an image.
-    Args:
-    - base64_image (str): The base64 encoded image.
-    - compliment_prompt (str): The prompt to use for the chat completion.
-    - model (str, optional): The model to use for the chat completion.
-    - max_tokens (int, optional): The maximum number of tokens to generate.
-    - temperature (float, optional): The sampling temperature.
-    Returns:
-    - str: The generated compliment.
-    """
-    # Prepare the messages payload
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": compliment_prompt},
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_image}",
-                    },
-                },
-            ],
-        }
-    ]
-    # Use asynchronous API call with streaming if available
-    chat_stream = await client.chat.completions.create_async(
-        max_tokens=max_tokens,
-        temperature=temperature,
-        messages=messages,
-        model=model,
-        stream=True  # Enable streaming if the API supports it
-    )
-    # Collect the streamed response and process it
-    compliment = ""
-    async for chunk in chat_stream:
-        if 'choices' in chunk:
-            for choice in chunk['choices']:
-                if 'delta' in choice and 'content' in choice['delta']:
-                    content = choice['delta']['content']
-                    compliment += content
-                    # Here, you can process each chunk as it arrives
-                    # For example, start partial TTS processing
-        await asyncio.sleep(0)  # Yield control to the event loop
-    return compliment
-async def generate_compliment_and_audio(base64_image, compliment_prompt, model="llama-3.2-90b-vision-preview", max_tokens=300, temperature=0.5, tts_language='en'):
-    """
-    Generate a compliment and its audio, starting TTS processing as soon as possible to reduce latency.
-    Args:
-    - base64_image (str): The base64 encoded image.
-    - compliment_prompt (str): The prompt for generating the compliment.
-    - model (str, optional): The model to use for the chat completion.
-    - max_tokens (int, optional): The maximum number of tokens to generate.
-    - temperature (float, optional): The sampling temperature.
-    - tts_language (str, optional): The language code for TTS.
-    Returns:
-    - Tuple[str, bytes]: The generated compliment and the audio data.
-    """
-    # Prepare the messages payload
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": compliment_prompt},
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_image}",
-                    },
-                },
-            ],
-        }
-    ]
-    # Variables to store the text and TTS task
-    compliment_parts = []
-    tts_task = None
-    # Start the chat completion with streaming
-    chat_stream = await client.chat.completions.create_async(
-        max_tokens=max_tokens,
-        temperature=temperature,
-        messages=messages,
-        model=model,
-        stream=True
-    )
-    # Process the stream and start TTS as soon as possible
-    async for chunk in chat_stream:
-        if 'choices' in chunk:
-            for choice in chunk['choices']:
-                if 'delta' in choice and 'content' in choice['delta']:
-                    content = choice['delta']['content']
-                    compliment_parts.append(content)
-                    # Start TTS processing once we have enough content
-                    if tts_task is None and len(''.join(compliment_parts)) > 50:
-                        # Start the TTS processing asynchronously
-                        tts_task = asyncio.create_task(
-                            text_to_speech(''.join(compliment_parts), tts_language)
-                        )
-        await asyncio.sleep(0)  # Yield control to the event loop
-    # If TTS hasn't started yet, start it now with the full compliment
-    if tts_task is None:
-        full_compliment = ''.join(compliment_parts)
-        audio_data = await text_to_speech(full_compliment, tts_language)
-    else:
-        # Wait for the TTS task to complete
-        audio_data = await tts_task
-        full_compliment = ''.join(compliment_parts)
-    return full_compliment, audio_data

+import asyncio
+from groq import Groq
+import edge_tts
+import tempfile
+import os
+# Create a Groq client once at the module level to reuse across function calls
+client = Groq()
+async def text_to_speech(text, language):
+    # Map language to Edge TTS voice
+    if language.lower() == 'de':
+        voice = 'de-DE-KatjaNeural'  # German female voice
+    else:
+        voice = 'en-US-AriaNeural'  # English female voice
+    rate = "+10%"
+    pitch = "+0Hz"
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+        tmp_path = tmp_file.name
+    communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
+    await communicate.save(tmp_path)
+    # Do not delete the file yet; Gradio needs to access it
+    return tmp_path  # Return the path to the audio file
+async def generate_compliment_and_audio(base64_image, compliment_prompt, model="llama-3.2-90b-vision-preview", max_tokens=300, temperature=0.5, tts_language='en'):
+    """
+    Generate a compliment and its audio, starting TTS processing as soon as possible to reduce latency.
+    Args:
+    - base64_image (str): The base64 encoded image.
+    - compliment_prompt (str): The prompt for generating the compliment.
+    - model (str, optional): The model to use for the chat completion.
+    - max_tokens (int, optional): The maximum number of tokens to generate.
+    - temperature (float, optional): The sampling temperature.
+    - tts_language (str, optional): The language code for TTS.
+    Returns:
+    - Tuple[str, str]: The generated compliment and the audio file path.
+    """
+    # Prepare the messages payload
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": compliment_prompt},
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{base64_image}",
+                    },
+                },
+            ],
+        }
+    ]
+    # Since the Groq client does not support async methods, use asyncio.to_thread
+    def fetch_compliment():
+        return client.chat.completions.create(
+            max_tokens=max_tokens,
+            temperature=temperature,
+            messages=messages,
+            model=model,
+        )
+    # Call the synchronous function in a separate thread
+    chat_completion = await asyncio.to_thread(fetch_compliment)
+    # Extract the compliment
+    compliment = chat_completion.choices[0].message.content
+    # Start the TTS processing asynchronously
+    audio_file_path = await text_to_speech(compliment, tts_language)
+    return compliment, audio_file_path