Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

Rajhuggingface4253 commited on Oct 18, 2025

Commit

1b19075

verified ·

1 Parent(s): b01e4fa

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -18

app.py CHANGED Viewed

@@ -376,41 +376,51 @@ async def stream_text_to_speech_cloning(
     output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
     reference_audio: UploadFile = File(...)):
     """
-    Sentence-by-Sentence Streaming Endpoint (Kokoro Feature adaptation).
-    Includes FFmpeg conversion for uploaded audio format compatibility.
     """
     if not hasattr(app.state, 'tts_wrapper'):
         raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
     # 1. Asynchronously save reference audio (non-blocking)
     temp_ref_path = await save_upload_file_async(reference_audio)
-    converted_wav_path = None # NEW: Initialize for cleanup
     try:
-        # 2. **NEW STEP**: Convert the uploaded file (WebM, etc.) to a 24kHz WAV file using FFmpeg
         converted_wav_path = await run_blocking_task_async(
             convert_to_wav_blocking,
             temp_ref_path
         )
-        # 3. Define the generator function, which will run in the thread pool implicitly
-        def stream_generator():
             try:
-                # The entire streaming process runs blocking inside the thread pool
                 for chunk_bytes in app.state.tts_wrapper.stream_speech_blocking(
                     text,
-                    converted_wav_path, # IMPORTANT: Pass the CONVERTED WAV path
                     speed,
                     output_format
                 ):
                     yield chunk_bytes
             except Exception as e:
                 logger.error(f"Streaming generator error: {e}")
-                # Note: Cleanup for converted_wav_path is handled in the main finally block below.
-        # The StreamingResponse is returned immediately to start the stream
         return StreamingResponse(
-            stream_generator(),
             media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
             headers={
                 "Content-Disposition": "attachment; filename=tts_live_stream.mp3",
@@ -418,18 +428,20 @@ async def stream_text_to_speech_cloning(
                 "Cache-Control": "no-cache"
             }
         )
     except Exception as e:
         logger.error(f"Streaming setup error: {e}")
-        # Reraise HTTPExceptions that may have come from the conversion step
-        if isinstance(e, HTTPException):
-             raise
-        raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
-    finally:
-        # 4. Clean up BOTH the original file AND the converted WAV file
         if os.path.exists(temp_ref_path):
             os.unlink(temp_ref_path)
         if converted_wav_path and os.path.exists(converted_wav_path):
             os.unlink(converted_wav_path)
 @app.get("/audio/{filename}")
 async def get_audio(filename: str):

     output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
     reference_audio: UploadFile = File(...)):
     """
+    Sentence-by-Sentence Streaming Endpoint.
+    Fixes race condition by moving cleanup into the streaming generator.
     """
     if not hasattr(app.state, 'tts_wrapper'):
         raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
     # 1. Asynchronously save reference audio (non-blocking)
     temp_ref_path = await save_upload_file_async(reference_audio)
+    converted_wav_path = None # Initialize for cleanup
     try:
+        # 2. Convert the uploaded file (WebM, etc.) to a 24kHz WAV file
         converted_wav_path = await run_blocking_task_async(
             convert_to_wav_blocking,
             temp_ref_path
         )
+        # 2.5. CLEANUP ORIGINAL FILE IMMEDIATELY: It is no longer needed after conversion
+        if os.path.exists(temp_ref_path):
+            os.unlink(temp_ref_path)
+        # 3. Define the generator function, which will run in the thread pool
+        def stream_generator(path_to_delete: str):
             try:
+                # This logic uses the path_to_delete parameter, which is guaranteed to exist
                 for chunk_bytes in app.state.tts_wrapper.stream_speech_blocking(
                     text,
+                    path_to_delete, # Pass the CONVERTED WAV path
                     speed,
                     output_format
                 ):
                     yield chunk_bytes
             except Exception as e:
+                # Log the error and raise it to stop the stream
                 logger.error(f"Streaming generator error: {e}")
+                raise # Re-raise to ensure the stream terminates
+            finally:
+                # 4. **CRUCIAL FIX:** Clean up the converted file ONLY AFTER GENERATION IS DONE
+                if os.path.exists(path_to_delete):
+                    os.unlink(path_to_delete)
+                    logger.info(f"Cleaned up converted file: {path_to_delete}")
+        # Return StreamingResponse, passing the path to the generator
         return StreamingResponse(
+            stream_generator(converted_wav_path),
             media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
             headers={
                 "Content-Disposition": "attachment; filename=tts_live_stream.mp3",
                 "Cache-Control": "no-cache"
             }
         )
     except Exception as e:
         logger.error(f"Streaming setup error: {e}")
+        # Clean up files only if the setup failed *before* starting the generator
         if os.path.exists(temp_ref_path):
             os.unlink(temp_ref_path)
         if converted_wav_path and os.path.exists(converted_wav_path):
             os.unlink(converted_wav_path)
+        # Reraise HTTPExceptions that may have come from the conversion step
+        if isinstance(e, HTTPException):
+             raise
+        raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
+    # Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
 @app.get("/audio/{filename}")
 async def get_audio(filename: str):