shanusherly commited on
Commit
3e5f5e4
·
verified ·
1 Parent(s): 9d3af1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -107
app.py CHANGED
@@ -10,16 +10,17 @@ from google.api_core.exceptions import ResourceExhausted
10
  # Config / Secrets
11
  # -----------------------
12
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
13
- ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
14
- ELEVENLABS_MODEL_ID = os.environ.get("ELEVENLABS_MODEL_ID", None) # optional override
15
- ELEVENLABS_VOICE_ID = os.environ.get("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM")
16
- HF_API_TOKEN = os.environ.get("HF_API_TOKEN") # Hugging Face fallback token
17
- HF_TTS_MODEL = os.environ.get("HF_TTS_MODEL", "microsoft/speecht5_tts") # fallback HF model id
18
  AUDIO_TMP_DIR = "/tmp"
19
 
20
  if not GEMINI_API_KEY:
21
  raise RuntimeError("Missing GEMINI_API_KEY in environment. Add it to HF Space Secrets as GEMINI_API_KEY.")
22
 
 
 
 
 
23
  # Configure Gemini
24
  genai.configure(api_key=GEMINI_API_KEY)
25
  gemini_model = genai.GenerativeModel("gemini-2.5-flash")
@@ -49,7 +50,7 @@ class SimpleMemory:
49
  memory = SimpleMemory(max_messages=40)
50
 
51
  # -----------------------
52
- # Prompt
53
  # -----------------------
54
  PROMPT_TEMPLATE = """You are a helpful assistant.
55
  {chat_history}
@@ -107,89 +108,12 @@ def generate_text_with_gemini(user_message):
107
  return None, f"Gemini error: {repr(efinal)}"
108
 
109
  # -----------------------
110
- # ElevenLabs HTTP TTS (tries a list of models)
111
- # Returns (path, error)
112
- # -----------------------
113
- def generate_audio_elevenlabs_http(text):
114
- if not ELEVENLABS_API_KEY:
115
- return "", "ELEVENLABS_API_KEY not configured."
116
-
117
- candidates = []
118
- if ELEVENLABS_MODEL_ID:
119
- candidates.append(ELEVENLABS_MODEL_ID)
120
- candidates += [
121
- "eleven_multilingual_v2",
122
- "eleven_creative_v1",
123
- "eleven_standard_v1",
124
- # legacy (likely deprecated) left last
125
- "eleven_monolingual_v1",
126
- "eleven_multilingual_v1",
127
- ]
128
-
129
- url_template = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
130
- last_err = None
131
-
132
- for model_id in [m for m in candidates if m]:
133
- url = url_template.format(voice_id=ELEVENLABS_VOICE_ID)
134
- headers = {
135
- "Accept": "audio/mpeg",
136
- "Content-Type": "application/json",
137
- "xi-api-key": ELEVENLABS_API_KEY
138
- }
139
- payload = {
140
- "text": text,
141
- "model_id": model_id,
142
- "voice_settings": {"stability": 0.5, "similarity_boost": 0.5}
143
- }
144
-
145
- try:
146
- resp = requests.post(url, json=payload, headers=headers, timeout=30)
147
- except Exception as e:
148
- last_err = f"ElevenLabs HTTP request failed for model {model_id}: {e}"
149
- print(last_err)
150
- continue
151
-
152
- if resp.status_code == 200:
153
- try:
154
- filename = f"audio_{int(time.time()*1000)}_{abs(hash(text))%100000}.mp3"
155
- path = os.path.join(AUDIO_TMP_DIR, filename)
156
- with open(path, "wb") as f:
157
- f.write(resp.content)
158
- print(f"ElevenLabs: audio saved to {path} using model {model_id}")
159
- return path, ""
160
- except Exception as e:
161
- last_err = f"Failed to save ElevenLabs audio for {model_id}: {e}"
162
- print(last_err)
163
- continue
164
- else:
165
- try:
166
- body = resp.json()
167
- except Exception:
168
- body = resp.text
169
- last_err = f"ElevenLabs API error {resp.status_code} (model={model_id}): {body}"
170
- print(last_err)
171
- # If the API indicates deprecated free tier, stop trying deprecated models
172
- try:
173
- detail = body.get("detail") if isinstance(body, dict) else None
174
- if detail and isinstance(detail, dict):
175
- status = detail.get("status", "")
176
- if "model_deprecated_free_tier" in str(status) or "detected_unusual_activity" in str(status):
177
- # break early in many cases
178
- break
179
- except Exception:
180
- pass
181
- continue
182
-
183
- return "", last_err or "Unknown ElevenLabs error"
184
-
185
- # -----------------------
186
- # Hugging Face Inference API TTS fallback
187
- # Requires HF_API_TOKEN in Secrets
188
  # Returns (path, error)
189
  # -----------------------
190
  def generate_audio_hf_inference(text):
191
  if not HF_API_TOKEN:
192
- return "", "HF_API_TOKEN not configured for fallback TTS."
193
 
194
  hf_url = f"https://api-inference.huggingface.co/models/{HF_TTS_MODEL}"
195
  headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
@@ -219,22 +143,6 @@ def generate_audio_hf_inference(text):
219
  body = resp.text
220
  return "", f"HuggingFace TTS error {resp.status_code}: {body}"
221
 
222
- # -----------------------
223
- # Combined audio generator: ElevenLabs -> HuggingFace fallback
224
- # -----------------------
225
- def generate_audio_with_fallback(text):
226
- # Try ElevenLabs first
227
- if ELEVENLABS_API_KEY:
228
- path, err = generate_audio_elevenlabs_http(text)
229
- if path:
230
- return path, ""
231
- print("ElevenLabs failed, will try HuggingFace fallback. reason:", err)
232
- # Try HF fallback
233
- path, err = generate_audio_hf_inference(text)
234
- if path:
235
- return path, ""
236
- return "", err or "All TTS providers failed."
237
-
238
  # -----------------------
239
  # Convert memory -> messages list for Gradio
240
  # -----------------------
@@ -250,7 +158,7 @@ def convert_memory_to_messages(history):
250
  # Returns (messages_list, audio_path, error)
251
  # -----------------------
252
  def process_user_message(user_message):
253
- # 1) generate text (robust)
254
  text, gen_err = generate_text_with_gemini(user_message)
255
  if gen_err:
256
  memory.add("user", user_message)
@@ -262,10 +170,10 @@ def process_user_message(user_message):
262
  memory.add("user", user_message)
263
  memory.add("bot", text)
264
 
265
- # 3) generate audio with fallback
266
- audio_path, audio_err = generate_audio_with_fallback(text)
267
  if audio_err:
268
- print("Audio generation error (all fallbacks):", audio_err)
269
 
270
  return convert_memory_to_messages(memory.history), audio_path or "", audio_err or ""
271
 
@@ -273,7 +181,7 @@ def process_user_message(user_message):
273
  # Gradio UI (Blocks) with debug UI
274
  # -----------------------
275
  with gr.Blocks() as demo:
276
- gr.Markdown("## 🤖 Gemini + TTS Chatbot (ElevenLabs → HuggingFace fallback)\n\nAudio shown if produced.")
277
  chatbot = gr.Chatbot()
278
  with gr.Row():
279
  txt = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")
@@ -284,7 +192,6 @@ with gr.Blocks() as demo:
284
  def submit_message(message):
285
  messages, audio_path, err = process_user_message(message)
286
  if audio_path:
287
- # success: show audio and show path in debug box
288
  debug_msg = f"Audio saved: {audio_path}"
289
  return messages, gr.update(value=audio_path, visible=True), gr.update(value=debug_msg, visible=True)
290
  elif err:
 
10
  # Config / Secrets
11
  # -----------------------
12
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
13
+ HF_API_TOKEN = os.environ.get("HF_API_TOKEN") # required for TTS
14
+ HF_TTS_MODEL = os.environ.get("HF_TTS_MODEL", "microsoft/speecht5_tts") # default fallback HF model
 
 
 
15
  AUDIO_TMP_DIR = "/tmp"
16
 
17
  if not GEMINI_API_KEY:
18
  raise RuntimeError("Missing GEMINI_API_KEY in environment. Add it to HF Space Secrets as GEMINI_API_KEY.")
19
 
20
+ if not HF_API_TOKEN:
21
+ # we'll still run text-only, but audio will fail until HF_API_TOKEN is set
22
+ print("Warning: HF_API_TOKEN not set. Audio will be unavailable until set in Space Secrets.")
23
+
24
  # Configure Gemini
25
  genai.configure(api_key=GEMINI_API_KEY)
26
  gemini_model = genai.GenerativeModel("gemini-2.5-flash")
 
50
  memory = SimpleMemory(max_messages=40)
51
 
52
  # -----------------------
53
+ # Prompt template
54
  # -----------------------
55
  PROMPT_TEMPLATE = """You are a helpful assistant.
56
  {chat_history}
 
108
  return None, f"Gemini error: {repr(efinal)}"
109
 
110
  # -----------------------
111
+ # Hugging Face Inference API TTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  # Returns (path, error)
113
  # -----------------------
114
  def generate_audio_hf_inference(text):
115
  if not HF_API_TOKEN:
116
+ return "", "HF_API_TOKEN not configured for TTS."
117
 
118
  hf_url = f"https://api-inference.huggingface.co/models/{HF_TTS_MODEL}"
119
  headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
 
143
  body = resp.text
144
  return "", f"HuggingFace TTS error {resp.status_code}: {body}"
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  # -----------------------
147
  # Convert memory -> messages list for Gradio
148
  # -----------------------
 
158
  # Returns (messages_list, audio_path, error)
159
  # -----------------------
160
  def process_user_message(user_message):
161
+ # 1) generate text
162
  text, gen_err = generate_text_with_gemini(user_message)
163
  if gen_err:
164
  memory.add("user", user_message)
 
170
  memory.add("user", user_message)
171
  memory.add("bot", text)
172
 
173
+ # 3) generate audio via Hugging Face
174
+ audio_path, audio_err = generate_audio_hf_inference(text)
175
  if audio_err:
176
+ print("Audio generation error (HF):", audio_err)
177
 
178
  return convert_memory_to_messages(memory.history), audio_path or "", audio_err or ""
179
 
 
181
  # Gradio UI (Blocks) with debug UI
182
  # -----------------------
183
  with gr.Blocks() as demo:
184
+ gr.Markdown("## 🤖 Gemini + Hugging Face TTS Chatbot\n\nAudio generated using Hugging Face Inference API.")
185
  chatbot = gr.Chatbot()
186
  with gr.Row():
187
  txt = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")
 
192
  def submit_message(message):
193
  messages, audio_path, err = process_user_message(message)
194
  if audio_path:
 
195
  debug_msg = f"Audio saved: {audio_path}"
196
  return messages, gr.update(value=audio_path, visible=True), gr.update(value=debug_msg, visible=True)
197
  elif err: