Spaces:
Sleeping
Sleeping
Peter Michael Gits Claude commited on
Commit Β·
cf3d441
1
Parent(s): 353335b
debug: Add comprehensive logging to verify STT service audio reception
Browse files- Added debug logging to gradio_transcribe_wrapper with file details
- Enhanced HTTP API endpoint with request/response logging
- Added startup debug messages for service configuration
- Debug logs will show if audio files are actually reaching the service
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
- __pycache__/app.cpython-313.pyc +0 -0
- app.py +52 -9
__pycache__/app.cpython-313.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ
|
|
|
app.py
CHANGED
|
@@ -41,7 +41,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 41 |
@spaces.GPU(duration=30)
|
| 42 |
def transcribe_audio_zerogpu(
|
| 43 |
audio_path: str,
|
| 44 |
-
language: str = "
|
| 45 |
model_size_param: str = "base"
|
| 46 |
) -> tuple[str, str, Dict[str, Any]]:
|
| 47 |
"""Transcribe audio file using Whisper with ZeroGPU"""
|
|
@@ -133,23 +133,44 @@ def get_service_info():
|
|
| 133 |
Connect your WebRTC client to: `wss://your-space.hf.space/ws/stt`
|
| 134 |
"""
|
| 135 |
|
| 136 |
-
def gradio_transcribe_wrapper(audio_file, language="
|
| 137 |
"""Gradio wrapper for transcription function"""
|
| 138 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
if audio_file is None:
|
|
|
|
| 140 |
return "β No audio file provided", "{}", "Please upload an audio file"
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
# Use the ZeroGPU transcription function
|
|
|
|
| 143 |
transcription, status, timing = transcribe_audio_zerogpu(
|
| 144 |
audio_file, language, model_size_param
|
| 145 |
)
|
| 146 |
|
|
|
|
|
|
|
|
|
|
| 147 |
if status == "success":
|
| 148 |
return f"β
{transcription}", json.dumps(timing, indent=2), f"Status: {status}"
|
| 149 |
else:
|
| 150 |
return f"β Transcription failed", json.dumps(timing, indent=2), f"Status: {status}"
|
| 151 |
|
| 152 |
except Exception as e:
|
|
|
|
|
|
|
| 153 |
return f"β Error: {str(e)}", "{}", "Error occurred during transcription"
|
| 154 |
|
| 155 |
# Create Gradio interface with transcription functionality
|
|
@@ -176,9 +197,9 @@ with gr.Blocks(title="π€ STT WebSocket Service v1.0.0") as demo:
|
|
| 176 |
sources=["upload", "microphone"]
|
| 177 |
)
|
| 178 |
language_input = gr.Dropdown(
|
| 179 |
-
choices=["
|
| 180 |
-
value="
|
| 181 |
-
label="Language (
|
| 182 |
)
|
| 183 |
model_input = gr.Dropdown(
|
| 184 |
choices=["tiny", "base", "small", "medium", "large-v2"],
|
|
@@ -289,7 +310,7 @@ async def process_audio_message(client_id: str, message: Dict[str, Any]):
|
|
| 289 |
# Transcribe audio using global ZeroGPU function
|
| 290 |
transcription, status, timing = transcribe_audio_zerogpu(
|
| 291 |
temp_path,
|
| 292 |
-
message.get("language", "
|
| 293 |
message.get("model_size", model_size)
|
| 294 |
)
|
| 295 |
|
|
@@ -330,7 +351,11 @@ async def process_audio_message(client_id: str, message: Dict[str, Any]):
|
|
| 330 |
# For HuggingFace Spaces - we need to launch the Gradio demo
|
| 331 |
# and add WebSocket routes to its internal FastAPI app
|
| 332 |
if __name__ == "__main__":
|
| 333 |
-
logger.info(f"π€ Starting {__service__} v{__version__} with Gradio+WebSocket integration")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
|
| 335 |
# Create FastAPI app for WebSocket endpoints
|
| 336 |
fastapi_app = FastAPI(title="STT WebSocket API")
|
|
@@ -415,23 +440,39 @@ if __name__ == "__main__":
|
|
| 415 |
@fastapi_app.post("/api/transcribe")
|
| 416 |
async def http_transcribe_endpoint(
|
| 417 |
file: UploadFile = File(...),
|
| 418 |
-
language: str = Form("
|
| 419 |
model_size_param: str = Form("base")
|
| 420 |
):
|
| 421 |
"""HTTP transcription endpoint for Streamlit WebRTC integration"""
|
| 422 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
# Save uploaded file
|
| 424 |
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as tmp_file:
|
| 425 |
content = await file.read()
|
| 426 |
tmp_file.write(content)
|
| 427 |
temp_path = tmp_file.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
|
| 429 |
try:
|
| 430 |
# Transcribe using ZeroGPU function
|
|
|
|
| 431 |
transcription, status, timing = transcribe_audio_zerogpu(
|
| 432 |
temp_path, language, model_size_param
|
| 433 |
)
|
| 434 |
|
|
|
|
|
|
|
|
|
|
| 435 |
if status == "success":
|
| 436 |
return {
|
| 437 |
"status": "success",
|
|
@@ -451,9 +492,11 @@ if __name__ == "__main__":
|
|
| 451 |
# Clean up
|
| 452 |
if os.path.exists(temp_path):
|
| 453 |
os.unlink(temp_path)
|
|
|
|
| 454 |
|
| 455 |
except Exception as e:
|
| 456 |
-
|
|
|
|
| 457 |
return {
|
| 458 |
"status": "error",
|
| 459 |
"message": f"HTTP transcription failed: {str(e)}",
|
|
|
|
| 41 |
@spaces.GPU(duration=30)
|
| 42 |
def transcribe_audio_zerogpu(
|
| 43 |
audio_path: str,
|
| 44 |
+
language: str = "en",
|
| 45 |
model_size_param: str = "base"
|
| 46 |
) -> tuple[str, str, Dict[str, Any]]:
|
| 47 |
"""Transcribe audio file using Whisper with ZeroGPU"""
|
|
|
|
| 133 |
Connect your WebRTC client to: `wss://your-space.hf.space/ws/stt`
|
| 134 |
"""
|
| 135 |
|
| 136 |
+
def gradio_transcribe_wrapper(audio_file, language="en", model_size_param="base"):
|
| 137 |
"""Gradio wrapper for transcription function"""
|
| 138 |
try:
|
| 139 |
+
# DEBUG: Log all incoming requests
|
| 140 |
+
logger.info(f"π€ DEBUG: Gradio transcription request received")
|
| 141 |
+
logger.info(f"π€ DEBUG: Audio file: {audio_file}")
|
| 142 |
+
logger.info(f"π€ DEBUG: Language: {language}")
|
| 143 |
+
logger.info(f"π€ DEBUG: Model size: {model_size_param}")
|
| 144 |
+
|
| 145 |
if audio_file is None:
|
| 146 |
+
logger.warning("π€ DEBUG: No audio file provided to Gradio wrapper")
|
| 147 |
return "β No audio file provided", "{}", "Please upload an audio file"
|
| 148 |
|
| 149 |
+
# DEBUG: Check file details
|
| 150 |
+
if isinstance(audio_file, str) and os.path.exists(audio_file):
|
| 151 |
+
file_size = os.path.getsize(audio_file)
|
| 152 |
+
logger.info(f"π€ DEBUG: Audio file size: {file_size} bytes")
|
| 153 |
+
logger.info(f"π€ DEBUG: Audio file path: {audio_file}")
|
| 154 |
+
else:
|
| 155 |
+
logger.warning(f"π€ DEBUG: Invalid audio file: {type(audio_file)}")
|
| 156 |
+
|
| 157 |
# Use the ZeroGPU transcription function
|
| 158 |
+
logger.info(f"π€ DEBUG: Calling transcribe_audio_zerogpu...")
|
| 159 |
transcription, status, timing = transcribe_audio_zerogpu(
|
| 160 |
audio_file, language, model_size_param
|
| 161 |
)
|
| 162 |
|
| 163 |
+
logger.info(f"π€ DEBUG: Transcription result: '{transcription[:100]}...'")
|
| 164 |
+
logger.info(f"π€ DEBUG: Status: {status}")
|
| 165 |
+
|
| 166 |
if status == "success":
|
| 167 |
return f"β
{transcription}", json.dumps(timing, indent=2), f"Status: {status}"
|
| 168 |
else:
|
| 169 |
return f"β Transcription failed", json.dumps(timing, indent=2), f"Status: {status}"
|
| 170 |
|
| 171 |
except Exception as e:
|
| 172 |
+
error_msg = f"Error in gradio_transcribe_wrapper: {str(e)}"
|
| 173 |
+
logger.error(f"π€ DEBUG: {error_msg}")
|
| 174 |
return f"β Error: {str(e)}", "{}", "Error occurred during transcription"
|
| 175 |
|
| 176 |
# Create Gradio interface with transcription functionality
|
|
|
|
| 197 |
sources=["upload", "microphone"]
|
| 198 |
)
|
| 199 |
language_input = gr.Dropdown(
|
| 200 |
+
choices=["en", "auto", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"],
|
| 201 |
+
value="en",
|
| 202 |
+
label="Language (English by default)"
|
| 203 |
)
|
| 204 |
model_input = gr.Dropdown(
|
| 205 |
choices=["tiny", "base", "small", "medium", "large-v2"],
|
|
|
|
| 310 |
# Transcribe audio using global ZeroGPU function
|
| 311 |
transcription, status, timing = transcribe_audio_zerogpu(
|
| 312 |
temp_path,
|
| 313 |
+
message.get("language", "en"),
|
| 314 |
message.get("model_size", model_size)
|
| 315 |
)
|
| 316 |
|
|
|
|
| 351 |
# For HuggingFace Spaces - we need to launch the Gradio demo
|
| 352 |
# and add WebSocket routes to its internal FastAPI app
|
| 353 |
if __name__ == "__main__":
|
| 354 |
+
logger.info(f"π€ DEBUG: Starting {__service__} v{__version__} with Gradio+WebSocket integration")
|
| 355 |
+
logger.info(f"π€ DEBUG: Device: {device}")
|
| 356 |
+
logger.info(f"π€ DEBUG: Model size: {model_size}")
|
| 357 |
+
logger.info(f"π€ DEBUG: Default language: English (en)")
|
| 358 |
+
logger.info(f"π€ DEBUG: Service ready for connections")
|
| 359 |
|
| 360 |
# Create FastAPI app for WebSocket endpoints
|
| 361 |
fastapi_app = FastAPI(title="STT WebSocket API")
|
|
|
|
| 440 |
@fastapi_app.post("/api/transcribe")
|
| 441 |
async def http_transcribe_endpoint(
|
| 442 |
file: UploadFile = File(...),
|
| 443 |
+
language: str = Form("en"),
|
| 444 |
model_size_param: str = Form("base")
|
| 445 |
):
|
| 446 |
"""HTTP transcription endpoint for Streamlit WebRTC integration"""
|
| 447 |
try:
|
| 448 |
+
# DEBUG: Log incoming HTTP request
|
| 449 |
+
logger.info(f"π DEBUG: HTTP transcribe request received")
|
| 450 |
+
logger.info(f"π DEBUG: File name: {file.filename}")
|
| 451 |
+
logger.info(f"π DEBUG: Content type: {file.content_type}")
|
| 452 |
+
logger.info(f"π DEBUG: Language: {language}")
|
| 453 |
+
logger.info(f"π DEBUG: Model size: {model_size_param}")
|
| 454 |
+
|
| 455 |
# Save uploaded file
|
| 456 |
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as tmp_file:
|
| 457 |
content = await file.read()
|
| 458 |
tmp_file.write(content)
|
| 459 |
temp_path = tmp_file.name
|
| 460 |
+
|
| 461 |
+
# DEBUG: Log file details
|
| 462 |
+
file_size = len(content)
|
| 463 |
+
logger.info(f"π DEBUG: Uploaded file size: {file_size} bytes")
|
| 464 |
+
logger.info(f"π DEBUG: Temp file path: {temp_path}")
|
| 465 |
|
| 466 |
try:
|
| 467 |
# Transcribe using ZeroGPU function
|
| 468 |
+
logger.info(f"π DEBUG: Starting HTTP transcription...")
|
| 469 |
transcription, status, timing = transcribe_audio_zerogpu(
|
| 470 |
temp_path, language, model_size_param
|
| 471 |
)
|
| 472 |
|
| 473 |
+
logger.info(f"π DEBUG: HTTP transcription result: '{transcription[:100] if transcription else 'None'}...'")
|
| 474 |
+
logger.info(f"π DEBUG: HTTP status: {status}")
|
| 475 |
+
|
| 476 |
if status == "success":
|
| 477 |
return {
|
| 478 |
"status": "success",
|
|
|
|
| 492 |
# Clean up
|
| 493 |
if os.path.exists(temp_path):
|
| 494 |
os.unlink(temp_path)
|
| 495 |
+
logger.info(f"π DEBUG: Cleaned up temp file: {temp_path}")
|
| 496 |
|
| 497 |
except Exception as e:
|
| 498 |
+
error_msg = f"HTTP transcription error: {e}"
|
| 499 |
+
logger.error(f"π DEBUG: {error_msg}")
|
| 500 |
return {
|
| 501 |
"status": "error",
|
| 502 |
"message": f"HTTP transcription failed: {str(e)}",
|