rairo commited on
Commit
e786f22
·
verified ·
1 Parent(s): 7fb877b

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +45 -39
main.py CHANGED
@@ -363,36 +363,6 @@ def generate_quiz_with_gemini(notes_content, difficulty, num_questions=5):
363
  raise RuntimeError(f"AI failed to generate quiz: {e}")
364
 
365
 
366
- # === ElevenLabs TTS Helper ===
367
-
368
- def generate_tts_audio(text_to_speak, voice_id="Rachel"): # Example voice, choose one available
369
- """Generates TTS audio using ElevenLabs and returns audio bytes."""
370
- if not elevenlabs_client:
371
- raise ConnectionError("ElevenLabs client not initialized.")
372
- try:
373
- # Stream the audio generation
374
- audio_stream = elevenlabs_client.generate(
375
- text=text_to_speak,
376
- voice=voice_id, # You can customize this
377
- model="eleven_multilingual_v2", # Or another suitable model
378
- stream=True
379
- )
380
-
381
- # Collect audio bytes from the stream
382
- audio_bytes = b""
383
- for chunk in audio_stream:
384
- audio_bytes += chunk
385
-
386
- if not audio_bytes:
387
- raise ValueError("ElevenLabs generated empty audio.")
388
-
389
- return audio_bytes
390
-
391
- except Exception as e:
392
- logging.error(f"ElevenLabs TTS generation failed: {e}")
393
- raise RuntimeError(f"Failed to generate audio: {e}")
394
-
395
-
396
  # === Authentication Endpoints ===
397
 
398
 
@@ -914,12 +884,41 @@ except ImportError:
914
  raise ImportError("pydub/ffmpeg not installed or accessible")
915
 
916
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
917
  @app.route('/api/tutor/notes/<uuid:notes_id>/speak', methods=['POST'])
918
  def speak_notes(notes_id):
919
  """
920
- Generate TTS audio for notes using ElevenLabs (non-streaming),
921
  combine chunks using pydub, and store the final MP3 in Supabase Storage.
922
  Updates the note record with the audio URL and deducts credits.
 
923
  """
924
  if not PYDUB_AVAILABLE:
925
  logging.error("Audio processing library (pydub/ffmpeg) check failed.")
@@ -985,8 +984,16 @@ def speak_notes(notes_id):
985
  if not notes_content or not notes_content.strip():
986
  logging.warning(f"Note {notes_id} content is empty.")
987
  return jsonify({'error': 'Notes content is empty'}), 400
 
 
 
 
 
 
 
 
988
 
989
- # 4. Generate TTS Audio with chunking (Non-Streaming) and combine with pydub
990
  # ElevenLabs v2 non-streaming limit is often around 2500 chars, but check docs.
991
  CHUNK_SIZE = 2500
992
  text_chunks = [notes_content[i:i+CHUNK_SIZE] for i in range(0, len(notes_content), CHUNK_SIZE)]
@@ -997,16 +1004,15 @@ def speak_notes(notes_id):
997
  for i, chunk in enumerate(text_chunks):
998
  try:
999
  logging.debug(f"Generating audio for chunk {i+1}/{len(text_chunks)}...")
1000
- # Use stream=False (default) for non-streaming generation
1001
- chunk_audio_bytes = elevenlabs_client.generate(
1002
- text=chunk.strip(), # Ensure no leading/trailing whitespace in chunk
1003
- voice="Rachel", # Or your desired voice ID
1004
- model="eleven_multilingual_v2",
1005
- stream=False
1006
  )
1007
 
1008
  if not chunk_audio_bytes:
1009
- logging.warning(f"ElevenLabs returned empty audio for chunk {i+1} of note {notes_id}")
1010
  continue # Skip this chunk, maybe log or handle differently if needed
1011
 
1012
  # Load chunk audio bytes into pydub AudioSegment using BytesIO
 
363
  raise RuntimeError(f"AI failed to generate quiz: {e}")
364
 
365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  # === Authentication Endpoints ===
367
 
368
 
 
884
  raise ImportError("pydub/ffmpeg not installed or accessible")
885
 
886
 
887
+ def generate_tts_audio(text_to_speak, voice_id="Rachel"):
888
+ """Generates TTS audio using ElevenLabs and returns audio bytes."""
889
+ if not elevenlabs_client:
890
+ raise ConnectionError("ElevenLabs client not initialized.")
891
+ try:
892
+ # Stream the audio generation
893
+ audio_stream = elevenlabs_client.generate(
894
+ text=text_to_speak,
895
+ voice=voice_id, # You can customize this
896
+ model="eleven_multilingual_v2", # Or another suitable model
897
+ stream=True
898
+ )
899
+
900
+ # Collect audio bytes from the stream
901
+ audio_bytes = b""
902
+ for chunk in audio_stream:
903
+ audio_bytes += chunk
904
+
905
+ if not audio_bytes:
906
+ raise ValueError("ElevenLabs generated empty audio.")
907
+
908
+ return audio_bytes
909
+
910
+ except Exception as e:
911
+ logging.error(f"ElevenLabs TTS generation failed: {e}")
912
+ raise RuntimeError(f"Failed to generate audio: {e}")
913
+
914
+
915
  @app.route('/api/tutor/notes/<uuid:notes_id>/speak', methods=['POST'])
916
  def speak_notes(notes_id):
917
  """
918
+ Generate TTS audio for notes using ElevenLabs,
919
  combine chunks using pydub, and store the final MP3 in Supabase Storage.
920
  Updates the note record with the audio URL and deducts credits.
921
+ Rejects requests for content over 10,000 characters.
922
  """
923
  if not PYDUB_AVAILABLE:
924
  logging.error("Audio processing library (pydub/ffmpeg) check failed.")
 
984
  if not notes_content or not notes_content.strip():
985
  logging.warning(f"Note {notes_id} content is empty.")
986
  return jsonify({'error': 'Notes content is empty'}), 400
987
+
988
+ # Check for character limit (10,000 characters)
989
+ if len(notes_content) > 10000:
990
+ logging.warning(f"Note {notes_id} content exceeds 10,000 character limit ({len(notes_content)} chars).")
991
+ return jsonify({
992
+ 'error': 'Content exceeds maximum length',
993
+ 'message': f'Note content is {len(notes_content)} characters. Maximum allowed is 10,000 characters.'
994
+ }), 413
995
 
996
+ # 4. Generate TTS Audio with chunking (still need chunking for long texts)
997
  # ElevenLabs v2 non-streaming limit is often around 2500 chars, but check docs.
998
  CHUNK_SIZE = 2500
999
  text_chunks = [notes_content[i:i+CHUNK_SIZE] for i in range(0, len(notes_content), CHUNK_SIZE)]
 
1004
  for i, chunk in enumerate(text_chunks):
1005
  try:
1006
  logging.debug(f"Generating audio for chunk {i+1}/{len(text_chunks)}...")
1007
+
1008
+ # Use the new generate_tts_audio function
1009
+ chunk_audio_bytes = generate_tts_audio(
1010
+ text_to_speak=chunk.strip(), # Ensure no leading/trailing whitespace in chunk
1011
+ voice_id="Rachel" # Or your desired voice ID
 
1012
  )
1013
 
1014
  if not chunk_audio_bytes:
1015
+ logging.warning(f"TTS generation returned empty audio for chunk {i+1} of note {notes_id}")
1016
  continue # Skip this chunk, maybe log or handle differently if needed
1017
 
1018
  # Load chunk audio bytes into pydub AudioSegment using BytesIO