Update main.py
Browse files
main.py
CHANGED
|
@@ -363,36 +363,6 @@ def generate_quiz_with_gemini(notes_content, difficulty, num_questions=5):
|
|
| 363 |
raise RuntimeError(f"AI failed to generate quiz: {e}")
|
| 364 |
|
| 365 |
|
| 366 |
-
# === ElevenLabs TTS Helper ===
|
| 367 |
-
|
| 368 |
-
def generate_tts_audio(text_to_speak, voice_id="Rachel"): # Example voice, choose one available
|
| 369 |
-
"""Generates TTS audio using ElevenLabs and returns audio bytes."""
|
| 370 |
-
if not elevenlabs_client:
|
| 371 |
-
raise ConnectionError("ElevenLabs client not initialized.")
|
| 372 |
-
try:
|
| 373 |
-
# Stream the audio generation
|
| 374 |
-
audio_stream = elevenlabs_client.generate(
|
| 375 |
-
text=text_to_speak,
|
| 376 |
-
voice=voice_id, # You can customize this
|
| 377 |
-
model="eleven_multilingual_v2", # Or another suitable model
|
| 378 |
-
stream=True
|
| 379 |
-
)
|
| 380 |
-
|
| 381 |
-
# Collect audio bytes from the stream
|
| 382 |
-
audio_bytes = b""
|
| 383 |
-
for chunk in audio_stream:
|
| 384 |
-
audio_bytes += chunk
|
| 385 |
-
|
| 386 |
-
if not audio_bytes:
|
| 387 |
-
raise ValueError("ElevenLabs generated empty audio.")
|
| 388 |
-
|
| 389 |
-
return audio_bytes
|
| 390 |
-
|
| 391 |
-
except Exception as e:
|
| 392 |
-
logging.error(f"ElevenLabs TTS generation failed: {e}")
|
| 393 |
-
raise RuntimeError(f"Failed to generate audio: {e}")
|
| 394 |
-
|
| 395 |
-
|
| 396 |
# === Authentication Endpoints ===
|
| 397 |
|
| 398 |
|
|
@@ -914,12 +884,41 @@ except ImportError:
|
|
| 914 |
raise ImportError("pydub/ffmpeg not installed or accessible")
|
| 915 |
|
| 916 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 917 |
@app.route('/api/tutor/notes/<uuid:notes_id>/speak', methods=['POST'])
|
| 918 |
def speak_notes(notes_id):
|
| 919 |
"""
|
| 920 |
-
Generate TTS audio for notes using ElevenLabs
|
| 921 |
combine chunks using pydub, and store the final MP3 in Supabase Storage.
|
| 922 |
Updates the note record with the audio URL and deducts credits.
|
|
|
|
| 923 |
"""
|
| 924 |
if not PYDUB_AVAILABLE:
|
| 925 |
logging.error("Audio processing library (pydub/ffmpeg) check failed.")
|
|
@@ -985,8 +984,16 @@ def speak_notes(notes_id):
|
|
| 985 |
if not notes_content or not notes_content.strip():
|
| 986 |
logging.warning(f"Note {notes_id} content is empty.")
|
| 987 |
return jsonify({'error': 'Notes content is empty'}), 400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 988 |
|
| 989 |
-
# 4. Generate TTS Audio with chunking (
|
| 990 |
# ElevenLabs v2 non-streaming limit is often around 2500 chars, but check docs.
|
| 991 |
CHUNK_SIZE = 2500
|
| 992 |
text_chunks = [notes_content[i:i+CHUNK_SIZE] for i in range(0, len(notes_content), CHUNK_SIZE)]
|
|
@@ -997,16 +1004,15 @@ def speak_notes(notes_id):
|
|
| 997 |
for i, chunk in enumerate(text_chunks):
|
| 998 |
try:
|
| 999 |
logging.debug(f"Generating audio for chunk {i+1}/{len(text_chunks)}...")
|
| 1000 |
-
|
| 1001 |
-
|
| 1002 |
-
|
| 1003 |
-
|
| 1004 |
-
|
| 1005 |
-
stream=False
|
| 1006 |
)
|
| 1007 |
|
| 1008 |
if not chunk_audio_bytes:
|
| 1009 |
-
logging.warning(f"
|
| 1010 |
continue # Skip this chunk, maybe log or handle differently if needed
|
| 1011 |
|
| 1012 |
# Load chunk audio bytes into pydub AudioSegment using BytesIO
|
|
|
|
| 363 |
raise RuntimeError(f"AI failed to generate quiz: {e}")
|
| 364 |
|
| 365 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
# === Authentication Endpoints ===
|
| 367 |
|
| 368 |
|
|
|
|
| 884 |
raise ImportError("pydub/ffmpeg not installed or accessible")
|
| 885 |
|
| 886 |
|
| 887 |
+
def generate_tts_audio(text_to_speak, voice_id="Rachel"):
|
| 888 |
+
"""Generates TTS audio using ElevenLabs and returns audio bytes."""
|
| 889 |
+
if not elevenlabs_client:
|
| 890 |
+
raise ConnectionError("ElevenLabs client not initialized.")
|
| 891 |
+
try:
|
| 892 |
+
# Stream the audio generation
|
| 893 |
+
audio_stream = elevenlabs_client.generate(
|
| 894 |
+
text=text_to_speak,
|
| 895 |
+
voice=voice_id, # You can customize this
|
| 896 |
+
model="eleven_multilingual_v2", # Or another suitable model
|
| 897 |
+
stream=True
|
| 898 |
+
)
|
| 899 |
+
|
| 900 |
+
# Collect audio bytes from the stream
|
| 901 |
+
audio_bytes = b""
|
| 902 |
+
for chunk in audio_stream:
|
| 903 |
+
audio_bytes += chunk
|
| 904 |
+
|
| 905 |
+
if not audio_bytes:
|
| 906 |
+
raise ValueError("ElevenLabs generated empty audio.")
|
| 907 |
+
|
| 908 |
+
return audio_bytes
|
| 909 |
+
|
| 910 |
+
except Exception as e:
|
| 911 |
+
logging.error(f"ElevenLabs TTS generation failed: {e}")
|
| 912 |
+
raise RuntimeError(f"Failed to generate audio: {e}")
|
| 913 |
+
|
| 914 |
+
|
| 915 |
@app.route('/api/tutor/notes/<uuid:notes_id>/speak', methods=['POST'])
|
| 916 |
def speak_notes(notes_id):
|
| 917 |
"""
|
| 918 |
+
Generate TTS audio for notes using ElevenLabs,
|
| 919 |
combine chunks using pydub, and store the final MP3 in Supabase Storage.
|
| 920 |
Updates the note record with the audio URL and deducts credits.
|
| 921 |
+
Rejects requests for content over 10,000 characters.
|
| 922 |
"""
|
| 923 |
if not PYDUB_AVAILABLE:
|
| 924 |
logging.error("Audio processing library (pydub/ffmpeg) check failed.")
|
|
|
|
| 984 |
if not notes_content or not notes_content.strip():
|
| 985 |
logging.warning(f"Note {notes_id} content is empty.")
|
| 986 |
return jsonify({'error': 'Notes content is empty'}), 400
|
| 987 |
+
|
| 988 |
+
# Check for character limit (10,000 characters)
|
| 989 |
+
if len(notes_content) > 10000:
|
| 990 |
+
logging.warning(f"Note {notes_id} content exceeds 10,000 character limit ({len(notes_content)} chars).")
|
| 991 |
+
return jsonify({
|
| 992 |
+
'error': 'Content exceeds maximum length',
|
| 993 |
+
'message': f'Note content is {len(notes_content)} characters. Maximum allowed is 10,000 characters.'
|
| 994 |
+
}), 413
|
| 995 |
|
| 996 |
+
# 4. Generate TTS Audio with chunking (still need chunking for long texts)
|
| 997 |
# ElevenLabs v2 non-streaming limit is often around 2500 chars, but check docs.
|
| 998 |
CHUNK_SIZE = 2500
|
| 999 |
text_chunks = [notes_content[i:i+CHUNK_SIZE] for i in range(0, len(notes_content), CHUNK_SIZE)]
|
|
|
|
| 1004 |
for i, chunk in enumerate(text_chunks):
|
| 1005 |
try:
|
| 1006 |
logging.debug(f"Generating audio for chunk {i+1}/{len(text_chunks)}...")
|
| 1007 |
+
|
| 1008 |
+
# Use the new generate_tts_audio function
|
| 1009 |
+
chunk_audio_bytes = generate_tts_audio(
|
| 1010 |
+
text_to_speak=chunk.strip(), # Ensure no leading/trailing whitespace in chunk
|
| 1011 |
+
voice_id="Rachel" # Or your desired voice ID
|
|
|
|
| 1012 |
)
|
| 1013 |
|
| 1014 |
if not chunk_audio_bytes:
|
| 1015 |
+
logging.warning(f"TTS generation returned empty audio for chunk {i+1} of note {notes_id}")
|
| 1016 |
continue # Skip this chunk, maybe log or handle differently if needed
|
| 1017 |
|
| 1018 |
# Load chunk audio bytes into pydub AudioSegment using BytesIO
|