Spaces:

shanusherly
/

audio

Running

App Files Files Community

shanusherly commited on 13 days ago

Commit

f0e7f66

verified ·

1 Parent(s): bac6824

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -99

app.py CHANGED Viewed

@@ -1,126 +1,197 @@
 import os
-import gradio as gr
 import requests
 import google.generativeai as genai
-# -----------------------------
-# Load API keys from environment
-# -----------------------------
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
-ELEVENLABS_VOICE_ID = "21m00Tcm4TlvDq8ikWAM"
 if not GEMINI_API_KEY:
-    raise RuntimeError("Missing GEMINI_API_KEY in environment")
-# Configure Gemini
 genai.configure(api_key=GEMINI_API_KEY)
-# -----------------------------
-# Simple prompt + in-memory history
-# -----------------------------
-PROMPT_TEMPLATE = """You are a helpful assistant.
-{chat_history}
-User: {user_message}
-Chatbot:"""
 class SimpleMemory:
     def __init__(self, max_messages=20):
         self.max_messages = max_messages
-        self.history = []
-    def add_user(self, text):
-        self.history.append(f"User: {text}")
-        self._trim()
-    def add_bot(self, text):
-        self.history.append(f"Chatbot: {text}")
-        self._trim()
-    def _trim(self):
         if len(self.history) > self.max_messages:
-            self.history = self.history[-self.max_messages:]
-    def as_text(self):
-        return "\n".join(self.history)
 memory = SimpleMemory(max_messages=20)
-# -----------------------------
-# Gemini wrapper
-# -----------------------------
-gemini_model = genai.GenerativeModel("gemini-2.5-flash")
 def generate_text_with_gemini(user_message):
-    chat_history = memory.as_text()
-    full_prompt = PROMPT_TEMPLATE.format(chat_history=chat_history, user_message=user_message)
-    response = gemini_model.generate_content(full_prompt)
-    text = response.text if hasattr(response, "text") else str(response)
-    return text
-# -----------------------------
-# ElevenLabs audio generation (lazy import)
-# -----------------------------
-def generate_audio_elevenlabs(text):
     try:
-        from elevenlabs.client import ElevenLabs
-        from elevenlabs import save
     except Exception as e:
-        print("ElevenLabs import failed:", e)
-        return ""
     try:
-        client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
-        audio = client.generate(
-            text=text,
-            voice=ELEVENLABS_VOICE_ID,
-            model="eleven_monolingual_v1"
-        )
-        output_path = f"/tmp/audio_{abs(hash(text)) % 100000}.mp3"
-        save(audio, output_path)
-        return output_path
     except Exception as e:
-        print("ElevenLabs generation error:", e)
-        return ""
-# -----------------------------
-# Combined response
-# -----------------------------
-def get_text_and_audio(user_message):
-    text = generate_text_with_gemini(user_message)
-    memory.add_user(user_message)
-    memory.add_bot(text)
-    audio_path = ""
-    if ELEVENLABS_API_KEY:
-        audio_path = generate_audio_elevenlabs(text)
-    return text, audio_path
-# -----------------------------
-# Gradio handler (UI unchanged except theme)
-# -----------------------------
-def chat_bot_response(message, history):
-    text, audio_path = get_text_and_audio(message)
-    # Return text only to keep UI identical; audio is generated in /tmp
-    return text
-# -----------------------------
-# UI (theme parameter removed to avoid Gradio version mismatch)
-# -----------------------------
-demo = gr.ChatInterface(
-    fn=chat_bot_response,
-    title="🤖 Gemini + ElevenLabs Chatbot",
-    description="Chat with Google Gemini AI with voice responses from ElevenLabs",
-    examples=[
-        "How are you doing?",
-        "What are your interests?",
-        "Tell me a short story",
-        "What's the weather like today?",
-        "Explain quantum computing in simple terms"
-    ]
-)
 if __name__ == "__main__":
-    demo.launch(debug=True, share=True)

 import os
+import time
+import json
 import requests
+import gradio as gr
 import google.generativeai as genai
+from google.api_core.exceptions import ResourceExhausted
+# -----------------------
+# Configuration / secrets
+# -----------------------
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
+ELEVENLABS_VOICE_ID = "21m00Tcm4TlvDq8ikWAM"  # change if you prefer another voice
+AUDIO_TMP_DIR = "/tmp"
 if not GEMINI_API_KEY:
+    raise RuntimeError("Missing GEMINI_API_KEY in environment. Set it in HF Space Secrets.")
+# Configure Gemini SDK
 genai.configure(api_key=GEMINI_API_KEY)
+# single model instance
+gemini_model = genai.GenerativeModel("gemini-2.5-flash")
+# -----------------------
+# Simple in-memory chat memory
+# -----------------------
 class SimpleMemory:
     def __init__(self, max_messages=20):
         self.max_messages = max_messages
+        self.history = []  # tuples (role, text)
+    def add(self, role, text):
+        self.history.append((role, text))
         if len(self.history) > self.max_messages:
+            self.history = self.history[-self.max_messages :]
+    def as_prompt_text(self):
+        # produce compact prompt history
+        lines = []
+        for role, text in self.history:
+            if role == "user":
+                lines.append(f"User: {text}")
+            else:
+                lines.append(f"Chatbot: {text}")
+        return "\n".join(lines)
 memory = SimpleMemory(max_messages=20)
+# -----------------------
+# Gemini text generation (safe)
+# -----------------------
+PROMPT_TEMPLATE = """You are a helpful assistant.
+{chat_history}
+User: {user_message}
+Chatbot:"""
 def generate_text_with_gemini(user_message):
+    chat_history_text = memory.as_prompt_text()
+    prompt = PROMPT_TEMPLATE.format(chat_history=chat_history_text, user_message=user_message)
     try:
+        response = gemini_model.generate_content(prompt)
+        text = response.text if hasattr(response, "text") else str(response)
+        return text, None
+    except ResourceExhausted as e:
+        # quota exceeded — return friendly message
+        print("Gemini quota exhausted:", e)
+        return None, "Gemini quota exceeded. Please try again later."
     except Exception as e:
+        print("Gemini error:", e)
+        return None, f"Gemini error: {str(e)}"
+# -----------------------
+# ElevenLabs HTTP fallback (robust)
+# -----------------------
+def generate_audio_elevenlabs_http(text):
+    """
+    Returns (output_path, error_message). On success: output_path path string, error_message empty.
+    On failure: output_path '', error_message string.
+    """
+    if not ELEVENLABS_API_KEY:
+        return "", "ELEVENLABS_API_KEY not configured."
+    url = f"https://api.elevenlabs.io/v1/text-to-speech/{ELEVENLABS_VOICE_ID}"
+    headers = {
+        "Accept": "audio/mpeg",
+        "Content-Type": "application/json",
+        "xi-api-key": ELEVENLABS_API_KEY
+    }
+    payload = {
+        "text": text,
+        "model_id": "eleven_monolingual_v1",
+        "voice_settings": {"stability": 0.5, "similarity_boost": 0.5}
+    }
     try:
+        resp = requests.post(url, json=payload, headers=headers, timeout=30)
     except Exception as e:
+        err = f"HTTP request to ElevenLabs failed: {e}"
+        print(err)
+        return "", err
+    if resp.status_code == 200:
+        try:
+            # save audio bytes to temp file
+            filename = f"audio_{int(time.time()*1000)}_{abs(hash(text))%100000}.mp3"
+            path = os.path.join(AUDIO_TMP_DIR, filename)
+            with open(path, "wb") as f:
+                f.write(resp.content)
+            return path, ""
+        except Exception as e:
+            err = f"Failed to save audio file: {e}"
+            print(err)
+            return "", err
+    else:
+        # return response body if available
+        try:
+            body = resp.json()
+        except Exception:
+            body = resp.text
+        err = f"ElevenLabs API error {resp.status_code}: {body}"
+        print(err)
+        return "", err
+# -----------------------
+# Main combined workflow
+# -----------------------
+def process_user_message(user_message):
+    """
+    Returns tuple: (chat_history_list, audio_path_or_empty, error_message_or_empty)
+    chat_history_list is a list of (speaker, message) for the UI chat component.
+    """
+    # 1) Get text from Gemini with error handling
+    text, gen_err = generate_text_with_gemini(user_message)
+    if gen_err:
+        # don't crash — show friendly message and no audio
+        memory.add("user", user_message)
+        fallback_text = "Sorry — the assistant is temporarily unavailable: " + gen_err
+        memory.add("bot", fallback_text)
+        # build chat list for UI
+        chat_list = [(role, msg) for role, msg in memory.history]
+        return chat_list, "", gen_err
+    # 2) Update memory
+    memory.add("user", user_message)
+    memory.add("bot", text)
+    # 3) Try to generate audio (HTTP fallback)
+    audio_path, audio_err = generate_audio_elevenlabs_http(text)
+    if audio_err:
+        print("Audio generation error:", audio_err)
+    # Return history and audio (audio path may be empty)
+    chat_list = [(role, msg) for role, msg in memory.history]
+    return chat_list, audio_path or "", audio_err or ""
+# -----------------------
+# Gradio UI (Blocks)
+# -----------------------
+with gr.Blocks() as demo:
+    gr.Markdown("## 🤖 Gemini + ElevenLabs Chatbot (Text + Audio replies)")
+    chatbot = gr.Chatbot(elem_id="chatbot")
+    with gr.Row():
+        txt = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")
+        send_btn = gr.Button("Send")
+    audio_player = gr.Audio(label="Last reply audio (if available)", visible=False)
+    # submit action
+    def submit_message(message):
+        # process and return chat content and audio
+        history, audio_path, audio_err = process_user_message(message)
+        # format chat history for gr.Chatbot: list of [user, bot] pairs for display
+        # our memory stores alternating user/bot entries; convert to pairs
+        pairs = []
+        temp_user = None
+        for role, msg in history:
+            if role == "user":
+                temp_user = msg
+            else:
+                pairs.append((temp_user or "", msg))
+                temp_user = None
+        # show audio if available
+        if audio_path:
+            return pairs, gr.update(value=audio_path, visible=True)
+        else:
+            return pairs, gr.update(value=None, visible=False)
+    # wire button and textbox
+    send_btn.click(fn=submit_message, inputs=[txt], outputs=[chatbot, audio_player])
+    txt.submit(fn=submit_message, inputs=[txt], outputs=[chatbot, audio_player])
+# Launch
 if __name__ == "__main__":
+    # Do not enable share=True unless you want a public link
+    demo.launch(debug=True)