Spaces:

rairo
/

NeoFix-API

Running

App Files Files Community

rairo commited on Jun 15, 2025

Commit

6ef20f2

verified ·

1 Parent(s): cdff30c

Update main.py

Browse files

Files changed (1) hide show

main.py +58 -1

main.py CHANGED Viewed

@@ -71,7 +71,7 @@ except Exception as e:
 # --- Model Constants (as per Streamlit app) ---
 CATEGORY_MODEL = "gemini-2.0-flash-exp"
 GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
-TTS_MODEL = "gemini-2.5-flash-preview-tts"
 # -----------------------------------------------------------------------------
@@ -126,6 +126,9 @@ def _convert_pcm_to_wav(pcm_data, sample_rate=24000, channels=1, sample_width=2)
     audio_buffer.seek(0)
     return audio_buffer.getvalue()
 def generate_tts_audio_and_upload(text_to_speak, uid, project_id, step_num):
     """Generates audio using the exact method from the Streamlit app and uploads it."""
     try:
@@ -156,6 +159,60 @@ def generate_tts_audio_and_upload(text_to_speak, uid, project_id, step_num):
     except Exception as e:
         print(f"Error during TTS generation for step {step_num}: {e}")
         return None
 def send_text_request(model_name, prompt, image):
     """Helper to send requests that expect only a text response."""

 # --- Model Constants (as per Streamlit app) ---
 CATEGORY_MODEL = "gemini-2.0-flash-exp"
 GENERATION_MODEL = "gemini-2.0-flash-exp-image-generation"
+#TTS_MODEL = "gemini-2.5-flash-preview-tts"
 # -----------------------------------------------------------------------------
     audio_buffer.seek(0)
     return audio_buffer.getvalue()
+#Gemini tts implementation SOTA but slow
+'''
 def generate_tts_audio_and_upload(text_to_speak, uid, project_id, step_num):
     """Generates audio using the exact method from the Streamlit app and uploads it."""
     try:
     except Exception as e:
         print(f"Error during TTS generation for step {step_num}: {e}")
         return None
+'''
+# DeepGram faster and efficient
+def generate_tts_audio_and_upload(text_to_speak, uid, project_id, step_num):
+    """
+    Generates audio using the Deepgram TTS API and uploads it to Firebase Storage.
+    This is a drop-in replacement for the previous Google GenAI TTS function.
+    """
+    try:
+        # --- Step 1: Get the Deepgram API Key from environment variables ---
+        api_key = os.environ.get("DEEPGRAM_API_KEY")
+        if not api_key:
+            print("FATAL: DEEPGRAM_API_KEY environment variable not set.")
+            return None
+        # --- Step 2: Define the API endpoint and headers ---
+        # The model 'aura-2-draco-en' is specified as a query parameter in the URL.
+        DEEPGRAM_URL = "https://api.deepgram.com/v1/speak?model=aura-2-draco-en"
+        headers = {
+            "Authorization": f"Token {api_key}",
+            "Content-Type": "text/plain"  # As per Deepgram's requirement for this type of request
+        }
+        # --- Step 3: Make the API call to Deepgram ---
+        # Deepgram expects the raw text as the request body, not in a JSON object.
+        # We send the text directly in the 'data' parameter.
+        response = requests.post(DEEPGRAM_URL, headers=headers, data=text_to_speak.encode('utf-8'))
+        # Raise an exception for bad status codes (4xx or 5xx)
+        response.raise_for_status()
+        # The raw audio data is in the response content
+        audio_data = response.content
+        # --- Step 4: Upload the received audio to Firebase Storage ---
+        # The output format from this Deepgram model is MP3.
+        audio_path = f"users/{uid}/projects/{project_id}/narrations/step_{step_num}.mp3"
+        # The MIME type for MP3 is 'audio/mpeg'.
+        narration_url = upload_to_storage(audio_data, audio_path, 'audio/mpeg')
+        return narration_url
+    except requests.exceptions.RequestException as e:
+        print(f"Error during Deepgram API call for step {step_num}: {e}")
+        # Log the response body if available for more detailed error info
+        if e.response is not None:
+            print(f"Deepgram Error Response: {e.response.text}")
+        return None
+    except Exception as e:
+        print(f"An unexpected error occurred during TTS generation for step {step_num}: {e}")
+        return None
 def send_text_request(model_name, prompt, image):
     """Helper to send requests that expect only a text response."""