Spaces:

rairo
/

tutor-api

Sleeping

App Files Files Community

rairo commited on Mar 30, 2025

Commit

e786f22

verified ·

1 Parent(s): 7fb877b

Update main.py

Browse files

Files changed (1) hide show

main.py +45 -39

main.py CHANGED Viewed

@@ -363,36 +363,6 @@ def generate_quiz_with_gemini(notes_content, difficulty, num_questions=5):
         raise RuntimeError(f"AI failed to generate quiz: {e}")
-# === ElevenLabs TTS Helper ===
-def generate_tts_audio(text_to_speak, voice_id="Rachel"): # Example voice, choose one available
-    """Generates TTS audio using ElevenLabs and returns audio bytes."""
-    if not elevenlabs_client:
-        raise ConnectionError("ElevenLabs client not initialized.")
-    try:
-        # Stream the audio generation
-        audio_stream = elevenlabs_client.generate(
-            text=text_to_speak,
-            voice=voice_id, # You can customize this
-            model="eleven_multilingual_v2", # Or another suitable model
-            stream=True
-        )
-        # Collect audio bytes from the stream
-        audio_bytes = b""
-        for chunk in audio_stream:
-            audio_bytes += chunk
-        if not audio_bytes:
-             raise ValueError("ElevenLabs generated empty audio.")
-        return audio_bytes
-    except Exception as e:
-        logging.error(f"ElevenLabs TTS generation failed: {e}")
-        raise RuntimeError(f"Failed to generate audio: {e}")
 # === Authentication Endpoints ===
@@ -914,12 +884,41 @@ except ImportError:
              raise ImportError("pydub/ffmpeg not installed or accessible")
 @app.route('/api/tutor/notes/<uuid:notes_id>/speak', methods=['POST'])
 def speak_notes(notes_id):
     """
-    Generate TTS audio for notes using ElevenLabs (non-streaming),
     combine chunks using pydub, and store the final MP3 in Supabase Storage.
     Updates the note record with the audio URL and deducts credits.
     """
     if not PYDUB_AVAILABLE:
          logging.error("Audio processing library (pydub/ffmpeg) check failed.")
@@ -985,8 +984,16 @@ def speak_notes(notes_id):
         if not notes_content or not notes_content.strip():
             logging.warning(f"Note {notes_id} content is empty.")
             return jsonify({'error': 'Notes content is empty'}), 400
-        # 4. Generate TTS Audio with chunking (Non-Streaming) and combine with pydub
         # ElevenLabs v2 non-streaming limit is often around 2500 chars, but check docs.
         CHUNK_SIZE = 2500
         text_chunks = [notes_content[i:i+CHUNK_SIZE] for i in range(0, len(notes_content), CHUNK_SIZE)]
@@ -997,16 +1004,15 @@ def speak_notes(notes_id):
         for i, chunk in enumerate(text_chunks):
             try:
                 logging.debug(f"Generating audio for chunk {i+1}/{len(text_chunks)}...")
-                # Use stream=False (default) for non-streaming generation
-                chunk_audio_bytes = elevenlabs_client.generate(
-                    text=chunk.strip(), # Ensure no leading/trailing whitespace in chunk
-                    voice="Rachel",       # Or your desired voice ID
-                    model="eleven_multilingual_v2",
-                    stream=False
                 )
                 if not chunk_audio_bytes:
-                     logging.warning(f"ElevenLabs returned empty audio for chunk {i+1} of note {notes_id}")
                      continue # Skip this chunk, maybe log or handle differently if needed
                 # Load chunk audio bytes into pydub AudioSegment using BytesIO

         raise RuntimeError(f"AI failed to generate quiz: {e}")
 # === Authentication Endpoints ===
              raise ImportError("pydub/ffmpeg not installed or accessible")
+def generate_tts_audio(text_to_speak, voice_id="Rachel"):
+    """Generates TTS audio using ElevenLabs and returns audio bytes."""
+    if not elevenlabs_client:
+        raise ConnectionError("ElevenLabs client not initialized.")
+    try:
+        # Stream the audio generation
+        audio_stream = elevenlabs_client.generate(
+            text=text_to_speak,
+            voice=voice_id, # You can customize this
+            model="eleven_multilingual_v2", # Or another suitable model
+            stream=True
+        )
+        # Collect audio bytes from the stream
+        audio_bytes = b""
+        for chunk in audio_stream:
+            audio_bytes += chunk
+        if not audio_bytes:
+             raise ValueError("ElevenLabs generated empty audio.")
+        return audio_bytes
+    except Exception as e:
+        logging.error(f"ElevenLabs TTS generation failed: {e}")
+        raise RuntimeError(f"Failed to generate audio: {e}")
 @app.route('/api/tutor/notes/<uuid:notes_id>/speak', methods=['POST'])
 def speak_notes(notes_id):
     """
+    Generate TTS audio for notes using ElevenLabs,
     combine chunks using pydub, and store the final MP3 in Supabase Storage.
     Updates the note record with the audio URL and deducts credits.
+    Rejects requests for content over 10,000 characters.
     """
     if not PYDUB_AVAILABLE:
          logging.error("Audio processing library (pydub/ffmpeg) check failed.")
         if not notes_content or not notes_content.strip():
             logging.warning(f"Note {notes_id} content is empty.")
             return jsonify({'error': 'Notes content is empty'}), 400
+        # Check for character limit (10,000 characters)
+        if len(notes_content) > 10000:
+            logging.warning(f"Note {notes_id} content exceeds 10,000 character limit ({len(notes_content)} chars).")
+            return jsonify({
+                'error': 'Content exceeds maximum length',
+                'message': f'Note content is {len(notes_content)} characters. Maximum allowed is 10,000 characters.'
+            }), 413
+        # 4. Generate TTS Audio with chunking (still need chunking for long texts)
         # ElevenLabs v2 non-streaming limit is often around 2500 chars, but check docs.
         CHUNK_SIZE = 2500
         text_chunks = [notes_content[i:i+CHUNK_SIZE] for i in range(0, len(notes_content), CHUNK_SIZE)]
         for i, chunk in enumerate(text_chunks):
             try:
                 logging.debug(f"Generating audio for chunk {i+1}/{len(text_chunks)}...")
+                # Use the new generate_tts_audio function
+                chunk_audio_bytes = generate_tts_audio(
+                    text_to_speak=chunk.strip(), # Ensure no leading/trailing whitespace in chunk
+                    voice_id="Rachel"            # Or your desired voice ID
                 )
                 if not chunk_audio_bytes:
+                     logging.warning(f"TTS generation returned empty audio for chunk {i+1} of note {notes_id}")
                      continue # Skip this chunk, maybe log or handle differently if needed
                 # Load chunk audio bytes into pydub AudioSegment using BytesIO