Spaces:

pgits
/

stt-gpu-service

Sleeping

Peter Michael Gits Claude commited on Aug 26, 2025

Commit

cf3d441

1 Parent(s): 353335b

debug: Add comprehensive logging to verify STT service audio reception

- Added debug logging to gradio_transcribe_wrapper with file details
- Enhanced HTTP API endpoint with request/response logging
- Added startup debug messages for service configuration
- Debug logs will show if audio files are actually reaching the service

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show

__pycache__/app.cpython-313.pyc +0 -0
app.py +52 -9

__pycache__/app.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ

app.py CHANGED Viewed

@@ -41,7 +41,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 @spaces.GPU(duration=30)
 def transcribe_audio_zerogpu(
     audio_path: str,
-    language: str = "auto",
     model_size_param: str = "base"
 ) -> tuple[str, str, Dict[str, Any]]:
     """Transcribe audio file using Whisper with ZeroGPU"""
@@ -133,23 +133,44 @@ def get_service_info():
 Connect your WebRTC client to: `wss://your-space.hf.space/ws/stt`
 """
-def gradio_transcribe_wrapper(audio_file, language="auto", model_size_param="base"):
     """Gradio wrapper for transcription function"""
     try:
         if audio_file is None:
             return "❌ No audio file provided", "{}", "Please upload an audio file"
         # Use the ZeroGPU transcription function
         transcription, status, timing = transcribe_audio_zerogpu(
             audio_file, language, model_size_param
         )
         if status == "success":
             return f"✅ {transcription}", json.dumps(timing, indent=2), f"Status: {status}"
         else:
             return f"❌ Transcription failed", json.dumps(timing, indent=2), f"Status: {status}"
     except Exception as e:
         return f"❌ Error: {str(e)}", "{}", "Error occurred during transcription"
 # Create Gradio interface with transcription functionality
@@ -176,9 +197,9 @@ with gr.Blocks(title="🎤 STT WebSocket Service v1.0.0") as demo:
                 sources=["upload", "microphone"]
             )
             language_input = gr.Dropdown(
-                choices=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"],
-                value="auto",
-                label="Language (auto-detect recommended)"
             )
             model_input = gr.Dropdown(
                 choices=["tiny", "base", "small", "medium", "large-v2"],
@@ -289,7 +310,7 @@ async def process_audio_message(client_id: str, message: Dict[str, Any]):
             # Transcribe audio using global ZeroGPU function
             transcription, status, timing = transcribe_audio_zerogpu(
                 temp_path,
-                message.get("language", "auto"),
                 message.get("model_size", model_size)
             )
@@ -330,7 +351,11 @@ async def process_audio_message(client_id: str, message: Dict[str, Any]):
 # For HuggingFace Spaces - we need to launch the Gradio demo
 # and add WebSocket routes to its internal FastAPI app
 if __name__ == "__main__":
-    logger.info(f"🎤 Starting {__service__} v{__version__} with Gradio+WebSocket integration")
     # Create FastAPI app for WebSocket endpoints
     fastapi_app = FastAPI(title="STT WebSocket API")
@@ -415,23 +440,39 @@ if __name__ == "__main__":
     @fastapi_app.post("/api/transcribe")
     async def http_transcribe_endpoint(
         file: UploadFile = File(...),
-        language: str = Form("auto"),
         model_size_param: str = Form("base")
     ):
         """HTTP transcription endpoint for Streamlit WebRTC integration"""
         try:
             # Save uploaded file
             with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as tmp_file:
                 content = await file.read()
                 tmp_file.write(content)
                 temp_path = tmp_file.name
             try:
                 # Transcribe using ZeroGPU function
                 transcription, status, timing = transcribe_audio_zerogpu(
                     temp_path, language, model_size_param
                 )
                 if status == "success":
                     return {
                         "status": "success",
@@ -451,9 +492,11 @@ if __name__ == "__main__":
                 # Clean up
                 if os.path.exists(temp_path):
                     os.unlink(temp_path)
         except Exception as e:
-            logger.error(f"HTTP transcription error: {e}")
             return {
                 "status": "error",
                 "message": f"HTTP transcription failed: {str(e)}",

 @spaces.GPU(duration=30)
 def transcribe_audio_zerogpu(
     audio_path: str,
+    language: str = "en",
     model_size_param: str = "base"
 ) -> tuple[str, str, Dict[str, Any]]:
     """Transcribe audio file using Whisper with ZeroGPU"""
 Connect your WebRTC client to: `wss://your-space.hf.space/ws/stt`
 """
+def gradio_transcribe_wrapper(audio_file, language="en", model_size_param="base"):
     """Gradio wrapper for transcription function"""
     try:
+        # DEBUG: Log all incoming requests
+        logger.info(f"🎤 DEBUG: Gradio transcription request received")
+        logger.info(f"🎤 DEBUG: Audio file: {audio_file}")
+        logger.info(f"🎤 DEBUG: Language: {language}")
+        logger.info(f"🎤 DEBUG: Model size: {model_size_param}")
         if audio_file is None:
+            logger.warning("🎤 DEBUG: No audio file provided to Gradio wrapper")
             return "❌ No audio file provided", "{}", "Please upload an audio file"
+        # DEBUG: Check file details
+        if isinstance(audio_file, str) and os.path.exists(audio_file):
+            file_size = os.path.getsize(audio_file)
+            logger.info(f"🎤 DEBUG: Audio file size: {file_size} bytes")
+            logger.info(f"🎤 DEBUG: Audio file path: {audio_file}")
+        else:
+            logger.warning(f"🎤 DEBUG: Invalid audio file: {type(audio_file)}")
         # Use the ZeroGPU transcription function
+        logger.info(f"🎤 DEBUG: Calling transcribe_audio_zerogpu...")
         transcription, status, timing = transcribe_audio_zerogpu(
             audio_file, language, model_size_param
         )
+        logger.info(f"🎤 DEBUG: Transcription result: '{transcription[:100]}...'")
+        logger.info(f"🎤 DEBUG: Status: {status}")
         if status == "success":
             return f"✅ {transcription}", json.dumps(timing, indent=2), f"Status: {status}"
         else:
             return f"❌ Transcription failed", json.dumps(timing, indent=2), f"Status: {status}"
     except Exception as e:
+        error_msg = f"Error in gradio_transcribe_wrapper: {str(e)}"
+        logger.error(f"🎤 DEBUG: {error_msg}")
         return f"❌ Error: {str(e)}", "{}", "Error occurred during transcription"
 # Create Gradio interface with transcription functionality
                 sources=["upload", "microphone"]
             )
             language_input = gr.Dropdown(
+                choices=["en", "auto", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"],
+                value="en",
+                label="Language (English by default)"
             )
             model_input = gr.Dropdown(
                 choices=["tiny", "base", "small", "medium", "large-v2"],
             # Transcribe audio using global ZeroGPU function
             transcription, status, timing = transcribe_audio_zerogpu(
                 temp_path,
+                message.get("language", "en"),
                 message.get("model_size", model_size)
             )
 # For HuggingFace Spaces - we need to launch the Gradio demo
 # and add WebSocket routes to its internal FastAPI app
 if __name__ == "__main__":
+    logger.info(f"🎤 DEBUG: Starting {__service__} v{__version__} with Gradio+WebSocket integration")
+    logger.info(f"🎤 DEBUG: Device: {device}")
+    logger.info(f"🎤 DEBUG: Model size: {model_size}")
+    logger.info(f"🎤 DEBUG: Default language: English (en)")
+    logger.info(f"🎤 DEBUG: Service ready for connections")
     # Create FastAPI app for WebSocket endpoints
     fastapi_app = FastAPI(title="STT WebSocket API")
     @fastapi_app.post("/api/transcribe")
     async def http_transcribe_endpoint(
         file: UploadFile = File(...),
+        language: str = Form("en"),
         model_size_param: str = Form("base")
     ):
         """HTTP transcription endpoint for Streamlit WebRTC integration"""
         try:
+            # DEBUG: Log incoming HTTP request
+            logger.info(f"🌐 DEBUG: HTTP transcribe request received")
+            logger.info(f"🌐 DEBUG: File name: {file.filename}")
+            logger.info(f"🌐 DEBUG: Content type: {file.content_type}")
+            logger.info(f"🌐 DEBUG: Language: {language}")
+            logger.info(f"🌐 DEBUG: Model size: {model_size_param}")
             # Save uploaded file
             with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as tmp_file:
                 content = await file.read()
                 tmp_file.write(content)
                 temp_path = tmp_file.name
+            # DEBUG: Log file details
+            file_size = len(content)
+            logger.info(f"🌐 DEBUG: Uploaded file size: {file_size} bytes")
+            logger.info(f"🌐 DEBUG: Temp file path: {temp_path}")
             try:
                 # Transcribe using ZeroGPU function
+                logger.info(f"🌐 DEBUG: Starting HTTP transcription...")
                 transcription, status, timing = transcribe_audio_zerogpu(
                     temp_path, language, model_size_param
                 )
+                logger.info(f"🌐 DEBUG: HTTP transcription result: '{transcription[:100] if transcription else 'None'}...'")
+                logger.info(f"🌐 DEBUG: HTTP status: {status}")
                 if status == "success":
                     return {
                         "status": "success",
                 # Clean up
                 if os.path.exists(temp_path):
                     os.unlink(temp_path)
+                    logger.info(f"🌐 DEBUG: Cleaned up temp file: {temp_path}")
         except Exception as e:
+            error_msg = f"HTTP transcription error: {e}"
+            logger.error(f"🌐 DEBUG: {error_msg}")
             return {
                 "status": "error",
                 "message": f"HTTP transcription failed: {str(e)}",