Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,16 +10,17 @@ from google.api_core.exceptions import ResourceExhausted
|
|
| 10 |
# Config / Secrets
|
| 11 |
# -----------------------
|
| 12 |
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
ELEVENLABS_VOICE_ID = os.environ.get("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM")
|
| 16 |
-
HF_API_TOKEN = os.environ.get("HF_API_TOKEN") # Hugging Face fallback token
|
| 17 |
-
HF_TTS_MODEL = os.environ.get("HF_TTS_MODEL", "microsoft/speecht5_tts") # fallback HF model id
|
| 18 |
AUDIO_TMP_DIR = "/tmp"
|
| 19 |
|
| 20 |
if not GEMINI_API_KEY:
|
| 21 |
raise RuntimeError("Missing GEMINI_API_KEY in environment. Add it to HF Space Secrets as GEMINI_API_KEY.")
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
# Configure Gemini
|
| 24 |
genai.configure(api_key=GEMINI_API_KEY)
|
| 25 |
gemini_model = genai.GenerativeModel("gemini-2.5-flash")
|
|
@@ -49,7 +50,7 @@ class SimpleMemory:
|
|
| 49 |
memory = SimpleMemory(max_messages=40)
|
| 50 |
|
| 51 |
# -----------------------
|
| 52 |
-
# Prompt
|
| 53 |
# -----------------------
|
| 54 |
PROMPT_TEMPLATE = """You are a helpful assistant.
|
| 55 |
{chat_history}
|
|
@@ -107,89 +108,12 @@ def generate_text_with_gemini(user_message):
|
|
| 107 |
return None, f"Gemini error: {repr(efinal)}"
|
| 108 |
|
| 109 |
# -----------------------
|
| 110 |
-
#
|
| 111 |
-
# Returns (path, error)
|
| 112 |
-
# -----------------------
|
| 113 |
-
def generate_audio_elevenlabs_http(text):
|
| 114 |
-
if not ELEVENLABS_API_KEY:
|
| 115 |
-
return "", "ELEVENLABS_API_KEY not configured."
|
| 116 |
-
|
| 117 |
-
candidates = []
|
| 118 |
-
if ELEVENLABS_MODEL_ID:
|
| 119 |
-
candidates.append(ELEVENLABS_MODEL_ID)
|
| 120 |
-
candidates += [
|
| 121 |
-
"eleven_multilingual_v2",
|
| 122 |
-
"eleven_creative_v1",
|
| 123 |
-
"eleven_standard_v1",
|
| 124 |
-
# legacy (likely deprecated) left last
|
| 125 |
-
"eleven_monolingual_v1",
|
| 126 |
-
"eleven_multilingual_v1",
|
| 127 |
-
]
|
| 128 |
-
|
| 129 |
-
url_template = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
| 130 |
-
last_err = None
|
| 131 |
-
|
| 132 |
-
for model_id in [m for m in candidates if m]:
|
| 133 |
-
url = url_template.format(voice_id=ELEVENLABS_VOICE_ID)
|
| 134 |
-
headers = {
|
| 135 |
-
"Accept": "audio/mpeg",
|
| 136 |
-
"Content-Type": "application/json",
|
| 137 |
-
"xi-api-key": ELEVENLABS_API_KEY
|
| 138 |
-
}
|
| 139 |
-
payload = {
|
| 140 |
-
"text": text,
|
| 141 |
-
"model_id": model_id,
|
| 142 |
-
"voice_settings": {"stability": 0.5, "similarity_boost": 0.5}
|
| 143 |
-
}
|
| 144 |
-
|
| 145 |
-
try:
|
| 146 |
-
resp = requests.post(url, json=payload, headers=headers, timeout=30)
|
| 147 |
-
except Exception as e:
|
| 148 |
-
last_err = f"ElevenLabs HTTP request failed for model {model_id}: {e}"
|
| 149 |
-
print(last_err)
|
| 150 |
-
continue
|
| 151 |
-
|
| 152 |
-
if resp.status_code == 200:
|
| 153 |
-
try:
|
| 154 |
-
filename = f"audio_{int(time.time()*1000)}_{abs(hash(text))%100000}.mp3"
|
| 155 |
-
path = os.path.join(AUDIO_TMP_DIR, filename)
|
| 156 |
-
with open(path, "wb") as f:
|
| 157 |
-
f.write(resp.content)
|
| 158 |
-
print(f"ElevenLabs: audio saved to {path} using model {model_id}")
|
| 159 |
-
return path, ""
|
| 160 |
-
except Exception as e:
|
| 161 |
-
last_err = f"Failed to save ElevenLabs audio for {model_id}: {e}"
|
| 162 |
-
print(last_err)
|
| 163 |
-
continue
|
| 164 |
-
else:
|
| 165 |
-
try:
|
| 166 |
-
body = resp.json()
|
| 167 |
-
except Exception:
|
| 168 |
-
body = resp.text
|
| 169 |
-
last_err = f"ElevenLabs API error {resp.status_code} (model={model_id}): {body}"
|
| 170 |
-
print(last_err)
|
| 171 |
-
# If the API indicates deprecated free tier, stop trying deprecated models
|
| 172 |
-
try:
|
| 173 |
-
detail = body.get("detail") if isinstance(body, dict) else None
|
| 174 |
-
if detail and isinstance(detail, dict):
|
| 175 |
-
status = detail.get("status", "")
|
| 176 |
-
if "model_deprecated_free_tier" in str(status) or "detected_unusual_activity" in str(status):
|
| 177 |
-
# break early in many cases
|
| 178 |
-
break
|
| 179 |
-
except Exception:
|
| 180 |
-
pass
|
| 181 |
-
continue
|
| 182 |
-
|
| 183 |
-
return "", last_err or "Unknown ElevenLabs error"
|
| 184 |
-
|
| 185 |
-
# -----------------------
|
| 186 |
-
# Hugging Face Inference API TTS fallback
|
| 187 |
-
# Requires HF_API_TOKEN in Secrets
|
| 188 |
# Returns (path, error)
|
| 189 |
# -----------------------
|
| 190 |
def generate_audio_hf_inference(text):
|
| 191 |
if not HF_API_TOKEN:
|
| 192 |
-
return "", "HF_API_TOKEN not configured for
|
| 193 |
|
| 194 |
hf_url = f"https://api-inference.huggingface.co/models/{HF_TTS_MODEL}"
|
| 195 |
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
|
@@ -219,22 +143,6 @@ def generate_audio_hf_inference(text):
|
|
| 219 |
body = resp.text
|
| 220 |
return "", f"HuggingFace TTS error {resp.status_code}: {body}"
|
| 221 |
|
| 222 |
-
# -----------------------
|
| 223 |
-
# Combined audio generator: ElevenLabs -> HuggingFace fallback
|
| 224 |
-
# -----------------------
|
| 225 |
-
def generate_audio_with_fallback(text):
|
| 226 |
-
# Try ElevenLabs first
|
| 227 |
-
if ELEVENLABS_API_KEY:
|
| 228 |
-
path, err = generate_audio_elevenlabs_http(text)
|
| 229 |
-
if path:
|
| 230 |
-
return path, ""
|
| 231 |
-
print("ElevenLabs failed, will try HuggingFace fallback. reason:", err)
|
| 232 |
-
# Try HF fallback
|
| 233 |
-
path, err = generate_audio_hf_inference(text)
|
| 234 |
-
if path:
|
| 235 |
-
return path, ""
|
| 236 |
-
return "", err or "All TTS providers failed."
|
| 237 |
-
|
| 238 |
# -----------------------
|
| 239 |
# Convert memory -> messages list for Gradio
|
| 240 |
# -----------------------
|
|
@@ -250,7 +158,7 @@ def convert_memory_to_messages(history):
|
|
| 250 |
# Returns (messages_list, audio_path, error)
|
| 251 |
# -----------------------
|
| 252 |
def process_user_message(user_message):
|
| 253 |
-
# 1) generate text
|
| 254 |
text, gen_err = generate_text_with_gemini(user_message)
|
| 255 |
if gen_err:
|
| 256 |
memory.add("user", user_message)
|
|
@@ -262,10 +170,10 @@ def process_user_message(user_message):
|
|
| 262 |
memory.add("user", user_message)
|
| 263 |
memory.add("bot", text)
|
| 264 |
|
| 265 |
-
# 3) generate audio
|
| 266 |
-
audio_path, audio_err =
|
| 267 |
if audio_err:
|
| 268 |
-
print("Audio generation error (
|
| 269 |
|
| 270 |
return convert_memory_to_messages(memory.history), audio_path or "", audio_err or ""
|
| 271 |
|
|
@@ -273,7 +181,7 @@ def process_user_message(user_message):
|
|
| 273 |
# Gradio UI (Blocks) with debug UI
|
| 274 |
# -----------------------
|
| 275 |
with gr.Blocks() as demo:
|
| 276 |
-
gr.Markdown("## 🤖 Gemini + TTS Chatbot
|
| 277 |
chatbot = gr.Chatbot()
|
| 278 |
with gr.Row():
|
| 279 |
txt = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")
|
|
@@ -284,7 +192,6 @@ with gr.Blocks() as demo:
|
|
| 284 |
def submit_message(message):
|
| 285 |
messages, audio_path, err = process_user_message(message)
|
| 286 |
if audio_path:
|
| 287 |
-
# success: show audio and show path in debug box
|
| 288 |
debug_msg = f"Audio saved: {audio_path}"
|
| 289 |
return messages, gr.update(value=audio_path, visible=True), gr.update(value=debug_msg, visible=True)
|
| 290 |
elif err:
|
|
|
|
| 10 |
# Config / Secrets
|
| 11 |
# -----------------------
|
| 12 |
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
| 13 |
+
HF_API_TOKEN = os.environ.get("HF_API_TOKEN") # required for TTS
|
| 14 |
+
HF_TTS_MODEL = os.environ.get("HF_TTS_MODEL", "microsoft/speecht5_tts") # default fallback HF model
|
|
|
|
|
|
|
|
|
|
| 15 |
AUDIO_TMP_DIR = "/tmp"
|
| 16 |
|
| 17 |
if not GEMINI_API_KEY:
|
| 18 |
raise RuntimeError("Missing GEMINI_API_KEY in environment. Add it to HF Space Secrets as GEMINI_API_KEY.")
|
| 19 |
|
| 20 |
+
if not HF_API_TOKEN:
|
| 21 |
+
# we'll still run text-only, but audio will fail until HF_API_TOKEN is set
|
| 22 |
+
print("Warning: HF_API_TOKEN not set. Audio will be unavailable until set in Space Secrets.")
|
| 23 |
+
|
| 24 |
# Configure Gemini
|
| 25 |
genai.configure(api_key=GEMINI_API_KEY)
|
| 26 |
gemini_model = genai.GenerativeModel("gemini-2.5-flash")
|
|
|
|
| 50 |
memory = SimpleMemory(max_messages=40)
|
| 51 |
|
| 52 |
# -----------------------
|
| 53 |
+
# Prompt template
|
| 54 |
# -----------------------
|
| 55 |
PROMPT_TEMPLATE = """You are a helpful assistant.
|
| 56 |
{chat_history}
|
|
|
|
| 108 |
return None, f"Gemini error: {repr(efinal)}"
|
| 109 |
|
| 110 |
# -----------------------
|
| 111 |
+
# Hugging Face Inference API TTS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# Returns (path, error)
|
| 113 |
# -----------------------
|
| 114 |
def generate_audio_hf_inference(text):
|
| 115 |
if not HF_API_TOKEN:
|
| 116 |
+
return "", "HF_API_TOKEN not configured for TTS."
|
| 117 |
|
| 118 |
hf_url = f"https://api-inference.huggingface.co/models/{HF_TTS_MODEL}"
|
| 119 |
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
|
|
|
| 143 |
body = resp.text
|
| 144 |
return "", f"HuggingFace TTS error {resp.status_code}: {body}"
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
# -----------------------
|
| 147 |
# Convert memory -> messages list for Gradio
|
| 148 |
# -----------------------
|
|
|
|
| 158 |
# Returns (messages_list, audio_path, error)
|
| 159 |
# -----------------------
|
| 160 |
def process_user_message(user_message):
|
| 161 |
+
# 1) generate text
|
| 162 |
text, gen_err = generate_text_with_gemini(user_message)
|
| 163 |
if gen_err:
|
| 164 |
memory.add("user", user_message)
|
|
|
|
| 170 |
memory.add("user", user_message)
|
| 171 |
memory.add("bot", text)
|
| 172 |
|
| 173 |
+
# 3) generate audio via Hugging Face
|
| 174 |
+
audio_path, audio_err = generate_audio_hf_inference(text)
|
| 175 |
if audio_err:
|
| 176 |
+
print("Audio generation error (HF):", audio_err)
|
| 177 |
|
| 178 |
return convert_memory_to_messages(memory.history), audio_path or "", audio_err or ""
|
| 179 |
|
|
|
|
| 181 |
# Gradio UI (Blocks) with debug UI
|
| 182 |
# -----------------------
|
| 183 |
with gr.Blocks() as demo:
|
| 184 |
+
gr.Markdown("## 🤖 Gemini + Hugging Face TTS Chatbot\n\nAudio generated using Hugging Face Inference API.")
|
| 185 |
chatbot = gr.Chatbot()
|
| 186 |
with gr.Row():
|
| 187 |
txt = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")
|
|
|
|
| 192 |
def submit_message(message):
|
| 193 |
messages, audio_path, err = process_user_message(message)
|
| 194 |
if audio_path:
|
|
|
|
| 195 |
debug_msg = f"Audio saved: {audio_path}"
|
| 196 |
return messages, gr.update(value=audio_path, visible=True), gr.update(value=debug_msg, visible=True)
|
| 197 |
elif err:
|