rairo commited on
Commit
6ef20f2
·
verified ·
1 Parent(s): cdff30c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +58 -1
main.py CHANGED
@@ -71,7 +71,7 @@ except Exception as e:
71
  # --- Model Constants (as per Streamlit app) ---
72
  CATEGORY_MODEL = "gemini-2.0-flash-exp"
73
  GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
74
- TTS_MODEL = "gemini-2.5-flash-preview-tts"
75
 
76
 
77
  # -----------------------------------------------------------------------------
@@ -126,6 +126,9 @@ def _convert_pcm_to_wav(pcm_data, sample_rate=24000, channels=1, sample_width=2)
126
  audio_buffer.seek(0)
127
  return audio_buffer.getvalue()
128
 
 
 
 
129
  def generate_tts_audio_and_upload(text_to_speak, uid, project_id, step_num):
130
  """Generates audio using the exact method from the Streamlit app and uploads it."""
131
  try:
@@ -156,6 +159,60 @@ def generate_tts_audio_and_upload(text_to_speak, uid, project_id, step_num):
156
  except Exception as e:
157
  print(f"Error during TTS generation for step {step_num}: {e}")
158
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
  def send_text_request(model_name, prompt, image):
161
  """Helper to send requests that expect only a text response."""
 
71
  # --- Model Constants (as per Streamlit app) ---
72
  CATEGORY_MODEL = "gemini-2.0-flash-exp"
73
  GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
74
+ #TTS_MODEL = "gemini-2.5-flash-preview-tts"
75
 
76
 
77
  # -----------------------------------------------------------------------------
 
126
  audio_buffer.seek(0)
127
  return audio_buffer.getvalue()
128
 
129
+
130
+ #Gemini tts implementation SOTA but slow
131
+ '''
132
  def generate_tts_audio_and_upload(text_to_speak, uid, project_id, step_num):
133
  """Generates audio using the exact method from the Streamlit app and uploads it."""
134
  try:
 
159
  except Exception as e:
160
  print(f"Error during TTS generation for step {step_num}: {e}")
161
  return None
162
+ '''
163
+
164
+ # DeepGram faster and efficient
165
+ def generate_tts_audio_and_upload(text_to_speak, uid, project_id, step_num):
166
+ """
167
+ Generates audio using the Deepgram TTS API and uploads it to Firebase Storage.
168
+ This is a drop-in replacement for the previous Google GenAI TTS function.
169
+ """
170
+ try:
171
+ # --- Step 1: Get the Deepgram API Key from environment variables ---
172
+ api_key = os.environ.get("DEEPGRAM_API_KEY")
173
+ if not api_key:
174
+ print("FATAL: DEEPGRAM_API_KEY environment variable not set.")
175
+ return None
176
+
177
+ # --- Step 2: Define the API endpoint and headers ---
178
+ # The model 'aura-2-draco-en' is specified as a query parameter in the URL.
179
+ DEEPGRAM_URL = "https://api.deepgram.com/v1/speak?model=aura-2-draco-en"
180
+
181
+ headers = {
182
+ "Authorization": f"Token {api_key}",
183
+ "Content-Type": "text/plain" # As per Deepgram's requirement for this type of request
184
+ }
185
+
186
+ # --- Step 3: Make the API call to Deepgram ---
187
+ # Deepgram expects the raw text as the request body, not in a JSON object.
188
+ # We send the text directly in the 'data' parameter.
189
+ response = requests.post(DEEPGRAM_URL, headers=headers, data=text_to_speak.encode('utf-8'))
190
+
191
+ # Raise an exception for bad status codes (4xx or 5xx)
192
+ response.raise_for_status()
193
+
194
+ # The raw audio data is in the response content
195
+ audio_data = response.content
196
+
197
+ # --- Step 4: Upload the received audio to Firebase Storage ---
198
+ # The output format from this Deepgram model is MP3.
199
+ audio_path = f"users/{uid}/projects/{project_id}/narrations/step_{step_num}.mp3"
200
+
201
+ # The MIME type for MP3 is 'audio/mpeg'.
202
+ narration_url = upload_to_storage(audio_data, audio_path, 'audio/mpeg')
203
+
204
+ return narration_url
205
+
206
+ except requests.exceptions.RequestException as e:
207
+ print(f"Error during Deepgram API call for step {step_num}: {e}")
208
+ # Log the response body if available for more detailed error info
209
+ if e.response is not None:
210
+ print(f"Deepgram Error Response: {e.response.text}")
211
+ return None
212
+ except Exception as e:
213
+ print(f"An unexpected error occurred during TTS generation for step {step_num}: {e}")
214
+ return None
215
+
216
 
217
  def send_text_request(model_name, prompt, image):
218
  """Helper to send requests that expect only a text response."""