# app.py # Groq Voice Chatbot — uses Groq chat + Groq TTS (GroqCloud only) import os import tempfile import requests import gradio as gr from dotenv import load_dotenv from typing import List, Dict load_dotenv() # --- Config --- GROQ_API_KEY = os.environ.get("GROQ_API_KEY") GROQ_CHAT_ENDPOINT = "https://api.groq.com/openai/v1/chat/completions" GROQ_TTS_ENDPOINT = "https://api.groq.com/openai/v1/audio/speech" # Change these to models you have access to DEFAULT_CHAT_MODEL = "llama-3.1-70b-versatile" # example chat model from Groq docs :contentReference[oaicite:3]{index=3} DEFAULT_TTS_MODEL = "playai-tts" EXAMPLE_VOICES = [ "Emma-PlayAI", "Fritz-PlayAI", "Alloy-PlayAI", ] # Utility to call Groq chat def groq_chat_generate(messages: List[Dict], model: str = DEFAULT_CHAT_MODEL, temperature: float = 0.7): if not GROQ_API_KEY: return None, "Missing GROQ_API_KEY environment variable." headers = { "Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json", } payload = { "model": model, "messages": messages, "temperature": temperature, } try: resp = requests.post(GROQ_CHAT_ENDPOINT, json=payload, headers=headers, timeout=60) except Exception as e: return None, f"Chat request failed: {e}" if resp.status_code != 200: try: detail = resp.json() except Exception: detail = resp.text return None, f"Groq chat error ({resp.status_code}): {detail}" data = resp.json() # expecting OpenAI-compatible response try: content = data["choices"][0]["message"]["content"] except Exception as e: return None, f"Unexpected chat response format: {e}" return content, None # Utility to call Groq TTS def groq_tts_synthesize(text: str, voice: str = None, model: str = DEFAULT_TTS_MODEL, response_format: str = "wav"): if not GROQ_API_KEY: return None, "Missing GROQ_API_KEY environment variable." if not text or not text.strip(): return None, "Nothing to synthesize." payload = { "model": model, "input": text, "response_format": response_format, } if voice: payload["voice"] = voice headers = { "Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json", } try: resp = requests.post(GROQ_TTS_ENDPOINT, json=payload, headers=headers, stream=True, timeout=90) except Exception as e: return None, f"TTS request failed: {e}" if resp.status_code != 200: try: detail = resp.json() except Exception: detail = resp.text return None, f"TTS error ({resp.status_code}): {detail}" suffix = ".wav" if response_format == "wav" else ".mp3" tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) try: for chunk in resp.iter_content(chunk_size=8192): if chunk: tmp.write(chunk) tmp.flush() tmp.close() return tmp.name, None except Exception as e: return None, f"Failed saving TTS audio: {e}" # Gradio UI with gr.Blocks(title="Groq Voice Chatbot") as demo: gr.Markdown("# 🤖 Groq Voice Chatbot\nType a message and the Groq chat model will reply and speak the response using Groq TTS.") with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot(label="Conversation") user_input = gr.Textbox(placeholder="Type your message here...", label="Your message") with gr.Row(): send_btn = gr.Button("Send") clear_btn = gr.Button("Clear") with gr.Column(scale=2): gr.Markdown("**Voice & Settings**") voice_dropdown = gr.Dropdown(EXAMPLE_VOICES, value=EXAMPLE_VOICES[0], label="Assistant voice") tts_format = gr.Radio(choices=["wav", "mp3"], value="wav", label="TTS format") temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Chat temperature") status = gr.Textbox(label="Status", interactive=False) state = gr.State([]) # conversation state for chat API def send_wrapper(user_text, conv_state, voice, tts_fmt, temp): if not user_text or not user_text.strip(): return gr.update(), conv_state, "Please type a message.", None conv = conv_state or [] conv.append({"role": "user", "content": user_text}) reply_text, chat_err = groq_chat_generate(conv, model=DEFAULT_CHAT_MODEL, temperature=temp) if chat_err: return gr.update(), conv, f"Chat error: {chat_err}", None conv.append({"role": "assistant", "content": reply_text}) audio_path, tts_err = groq_tts_synthesize(reply_text, voice=voice, response_format=tts_fmt) if tts_err: # return text-only but still show chat return ([( "You", user_text ), ( "Assistant", reply_text )], conv, f"TTS error: {tts_err}", None) return ([( "You", user_text ), ( "Assistant", reply_text )], conv, "OK", audio_path) send_btn.click( fn=send_wrapper, inputs=[user_input, state, voice_dropdown, tts_format, temperature], outputs=[chatbot, state, status, gr.Audio(label="Assistant audio", type="filepath")], ) def clear_all(): return [], [], "Cleared.", None clear_btn.click(fn=clear_all, inputs=None, outputs=[chatbot, state, status, gr.Audio(label="Assistant audio", type="filepath")]) if __name__ == "__main__": demo.launch()