Peter Michael Gits Claude commited on
Commit
cf3d441
Β·
1 Parent(s): 353335b

debug: Add comprehensive logging to verify STT service audio reception

Browse files

- Added debug logging to gradio_transcribe_wrapper with file details
- Enhanced HTTP API endpoint with request/response logging
- Added startup debug messages for service configuration
- Debug logs will show if audio files are actually reaching the service

πŸ€– Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show
  1. __pycache__/app.cpython-313.pyc +0 -0
  2. app.py +52 -9
__pycache__/app.cpython-313.pyc CHANGED
Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ
 
app.py CHANGED
@@ -41,7 +41,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
41
  @spaces.GPU(duration=30)
42
  def transcribe_audio_zerogpu(
43
  audio_path: str,
44
- language: str = "auto",
45
  model_size_param: str = "base"
46
  ) -> tuple[str, str, Dict[str, Any]]:
47
  """Transcribe audio file using Whisper with ZeroGPU"""
@@ -133,23 +133,44 @@ def get_service_info():
133
  Connect your WebRTC client to: `wss://your-space.hf.space/ws/stt`
134
  """
135
 
136
- def gradio_transcribe_wrapper(audio_file, language="auto", model_size_param="base"):
137
  """Gradio wrapper for transcription function"""
138
  try:
 
 
 
 
 
 
139
  if audio_file is None:
 
140
  return "❌ No audio file provided", "{}", "Please upload an audio file"
141
 
 
 
 
 
 
 
 
 
142
  # Use the ZeroGPU transcription function
 
143
  transcription, status, timing = transcribe_audio_zerogpu(
144
  audio_file, language, model_size_param
145
  )
146
 
 
 
 
147
  if status == "success":
148
  return f"βœ… {transcription}", json.dumps(timing, indent=2), f"Status: {status}"
149
  else:
150
  return f"❌ Transcription failed", json.dumps(timing, indent=2), f"Status: {status}"
151
 
152
  except Exception as e:
 
 
153
  return f"❌ Error: {str(e)}", "{}", "Error occurred during transcription"
154
 
155
  # Create Gradio interface with transcription functionality
@@ -176,9 +197,9 @@ with gr.Blocks(title="🎀 STT WebSocket Service v1.0.0") as demo:
176
  sources=["upload", "microphone"]
177
  )
178
  language_input = gr.Dropdown(
179
- choices=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"],
180
- value="auto",
181
- label="Language (auto-detect recommended)"
182
  )
183
  model_input = gr.Dropdown(
184
  choices=["tiny", "base", "small", "medium", "large-v2"],
@@ -289,7 +310,7 @@ async def process_audio_message(client_id: str, message: Dict[str, Any]):
289
  # Transcribe audio using global ZeroGPU function
290
  transcription, status, timing = transcribe_audio_zerogpu(
291
  temp_path,
292
- message.get("language", "auto"),
293
  message.get("model_size", model_size)
294
  )
295
 
@@ -330,7 +351,11 @@ async def process_audio_message(client_id: str, message: Dict[str, Any]):
330
  # For HuggingFace Spaces - we need to launch the Gradio demo
331
  # and add WebSocket routes to its internal FastAPI app
332
  if __name__ == "__main__":
333
- logger.info(f"🎀 Starting {__service__} v{__version__} with Gradio+WebSocket integration")
 
 
 
 
334
 
335
  # Create FastAPI app for WebSocket endpoints
336
  fastapi_app = FastAPI(title="STT WebSocket API")
@@ -415,23 +440,39 @@ if __name__ == "__main__":
415
  @fastapi_app.post("/api/transcribe")
416
  async def http_transcribe_endpoint(
417
  file: UploadFile = File(...),
418
- language: str = Form("auto"),
419
  model_size_param: str = Form("base")
420
  ):
421
  """HTTP transcription endpoint for Streamlit WebRTC integration"""
422
  try:
 
 
 
 
 
 
 
423
  # Save uploaded file
424
  with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as tmp_file:
425
  content = await file.read()
426
  tmp_file.write(content)
427
  temp_path = tmp_file.name
 
 
 
 
 
428
 
429
  try:
430
  # Transcribe using ZeroGPU function
 
431
  transcription, status, timing = transcribe_audio_zerogpu(
432
  temp_path, language, model_size_param
433
  )
434
 
 
 
 
435
  if status == "success":
436
  return {
437
  "status": "success",
@@ -451,9 +492,11 @@ if __name__ == "__main__":
451
  # Clean up
452
  if os.path.exists(temp_path):
453
  os.unlink(temp_path)
 
454
 
455
  except Exception as e:
456
- logger.error(f"HTTP transcription error: {e}")
 
457
  return {
458
  "status": "error",
459
  "message": f"HTTP transcription failed: {str(e)}",
 
41
  @spaces.GPU(duration=30)
42
  def transcribe_audio_zerogpu(
43
  audio_path: str,
44
+ language: str = "en",
45
  model_size_param: str = "base"
46
  ) -> tuple[str, str, Dict[str, Any]]:
47
  """Transcribe audio file using Whisper with ZeroGPU"""
 
133
  Connect your WebRTC client to: `wss://your-space.hf.space/ws/stt`
134
  """
135
 
136
+ def gradio_transcribe_wrapper(audio_file, language="en", model_size_param="base"):
137
  """Gradio wrapper for transcription function"""
138
  try:
139
+ # DEBUG: Log all incoming requests
140
+ logger.info(f"🎀 DEBUG: Gradio transcription request received")
141
+ logger.info(f"🎀 DEBUG: Audio file: {audio_file}")
142
+ logger.info(f"🎀 DEBUG: Language: {language}")
143
+ logger.info(f"🎀 DEBUG: Model size: {model_size_param}")
144
+
145
  if audio_file is None:
146
+ logger.warning("🎀 DEBUG: No audio file provided to Gradio wrapper")
147
  return "❌ No audio file provided", "{}", "Please upload an audio file"
148
 
149
+ # DEBUG: Check file details
150
+ if isinstance(audio_file, str) and os.path.exists(audio_file):
151
+ file_size = os.path.getsize(audio_file)
152
+ logger.info(f"🎀 DEBUG: Audio file size: {file_size} bytes")
153
+ logger.info(f"🎀 DEBUG: Audio file path: {audio_file}")
154
+ else:
155
+ logger.warning(f"🎀 DEBUG: Invalid audio file: {type(audio_file)}")
156
+
157
  # Use the ZeroGPU transcription function
158
+ logger.info(f"🎀 DEBUG: Calling transcribe_audio_zerogpu...")
159
  transcription, status, timing = transcribe_audio_zerogpu(
160
  audio_file, language, model_size_param
161
  )
162
 
163
+ logger.info(f"🎀 DEBUG: Transcription result: '{transcription[:100]}...'")
164
+ logger.info(f"🎀 DEBUG: Status: {status}")
165
+
166
  if status == "success":
167
  return f"βœ… {transcription}", json.dumps(timing, indent=2), f"Status: {status}"
168
  else:
169
  return f"❌ Transcription failed", json.dumps(timing, indent=2), f"Status: {status}"
170
 
171
  except Exception as e:
172
+ error_msg = f"Error in gradio_transcribe_wrapper: {str(e)}"
173
+ logger.error(f"🎀 DEBUG: {error_msg}")
174
  return f"❌ Error: {str(e)}", "{}", "Error occurred during transcription"
175
 
176
  # Create Gradio interface with transcription functionality
 
197
  sources=["upload", "microphone"]
198
  )
199
  language_input = gr.Dropdown(
200
+ choices=["en", "auto", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"],
201
+ value="en",
202
+ label="Language (English by default)"
203
  )
204
  model_input = gr.Dropdown(
205
  choices=["tiny", "base", "small", "medium", "large-v2"],
 
310
  # Transcribe audio using global ZeroGPU function
311
  transcription, status, timing = transcribe_audio_zerogpu(
312
  temp_path,
313
+ message.get("language", "en"),
314
  message.get("model_size", model_size)
315
  )
316
 
 
351
  # For HuggingFace Spaces - we need to launch the Gradio demo
352
  # and add WebSocket routes to its internal FastAPI app
353
  if __name__ == "__main__":
354
+ logger.info(f"🎀 DEBUG: Starting {__service__} v{__version__} with Gradio+WebSocket integration")
355
+ logger.info(f"🎀 DEBUG: Device: {device}")
356
+ logger.info(f"🎀 DEBUG: Model size: {model_size}")
357
+ logger.info(f"🎀 DEBUG: Default language: English (en)")
358
+ logger.info(f"🎀 DEBUG: Service ready for connections")
359
 
360
  # Create FastAPI app for WebSocket endpoints
361
  fastapi_app = FastAPI(title="STT WebSocket API")
 
440
  @fastapi_app.post("/api/transcribe")
441
  async def http_transcribe_endpoint(
442
  file: UploadFile = File(...),
443
+ language: str = Form("en"),
444
  model_size_param: str = Form("base")
445
  ):
446
  """HTTP transcription endpoint for Streamlit WebRTC integration"""
447
  try:
448
+ # DEBUG: Log incoming HTTP request
449
+ logger.info(f"🌐 DEBUG: HTTP transcribe request received")
450
+ logger.info(f"🌐 DEBUG: File name: {file.filename}")
451
+ logger.info(f"🌐 DEBUG: Content type: {file.content_type}")
452
+ logger.info(f"🌐 DEBUG: Language: {language}")
453
+ logger.info(f"🌐 DEBUG: Model size: {model_size_param}")
454
+
455
  # Save uploaded file
456
  with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as tmp_file:
457
  content = await file.read()
458
  tmp_file.write(content)
459
  temp_path = tmp_file.name
460
+
461
+ # DEBUG: Log file details
462
+ file_size = len(content)
463
+ logger.info(f"🌐 DEBUG: Uploaded file size: {file_size} bytes")
464
+ logger.info(f"🌐 DEBUG: Temp file path: {temp_path}")
465
 
466
  try:
467
  # Transcribe using ZeroGPU function
468
+ logger.info(f"🌐 DEBUG: Starting HTTP transcription...")
469
  transcription, status, timing = transcribe_audio_zerogpu(
470
  temp_path, language, model_size_param
471
  )
472
 
473
+ logger.info(f"🌐 DEBUG: HTTP transcription result: '{transcription[:100] if transcription else 'None'}...'")
474
+ logger.info(f"🌐 DEBUG: HTTP status: {status}")
475
+
476
  if status == "success":
477
  return {
478
  "status": "success",
 
492
  # Clean up
493
  if os.path.exists(temp_path):
494
  os.unlink(temp_path)
495
+ logger.info(f"🌐 DEBUG: Cleaned up temp file: {temp_path}")
496
 
497
  except Exception as e:
498
+ error_msg = f"HTTP transcription error: {e}"
499
+ logger.error(f"🌐 DEBUG: {error_msg}")
500
  return {
501
  "status": "error",
502
  "message": f"HTTP transcription failed: {str(e)}",