Spaces:

pgits
/

stt-gpu-service-python-v4

Runtime error

Peter Michael Gits Claude commited on Sep 4, 2025

Commit

b8737d8

1 Parent(s): 1a1d398

Add comprehensive LMGen debugging and introspection

v1.4.9 - Deep debug LMGen step() returning None:
- Added LMGen type and methods introspection
- Test step() with and without streaming context
- Log LMGen internal state and attributes
- Try different approaches to find working API pattern
- Will reveal why step() returns None and correct usage

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

app.py +30 -29

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ from fastapi.responses import JSONResponse, HTMLResponse
 import uvicorn
 # Version tracking
-VERSION = "1.4.8"
 COMMIT_SHA = "TBD"
 # Configure logging
@@ -176,35 +176,36 @@ def transcribe_audio_moshi(audio_data: np.ndarray, sample_rate: int = 24000) ->
                 try:
                     # Use the actual language model for generation
                     if lm_gen and lm_gen != "mock":
-                        # Use streaming LMGen step method for text generation
-                        with lm_gen.streaming(1):
-                            text_tokens = []
-                            for i in range(audio_tokens.shape[-1]):
-                                # Extract single timestep tokens
-                                code_step = audio_tokens[:, :, i:i+1]  # [B, 8, 1]
-                                logger.info(f"🔍 Step {i}: code_step shape: {code_step.shape}")
-                                # Generate tokens using step method
-                                tokens_out = lm_gen.step(code_step)  # [B, 1 + 8, 1]
-                                logger.info(f"🔍 Step {i}: tokens_out type: {type(tokens_out)}, value: {tokens_out}")
-                                if tokens_out is not None:
-                                    logger.info(f"🔍 Step {i}: tokens_out shape: {tokens_out.shape}")
-                                    # Extract text token (index 1)
-                                    text_token = tokens_out[:, 1:2, :]  # [B, 1, 1]
-                                    text_tokens.append(text_token)
-                                    logger.info(f"✅ Step {i}: Added text token shape: {text_token.shape}")
-                                else:
-                                    logger.error(f"❌ Step {i}: lm_gen.step() returned None!")
-                                    break
-                            # Concatenate all text tokens
-                            if text_tokens:
-                                all_text_tokens = torch.cat(text_tokens, dim=-1)
-                                text_output = f"Moshiko CPU transcription: Generated {all_text_tokens.shape} text tokens"
-                                logger.info(f"✅ Generated transcription: {text_output}")
                             else:
-                                text_output = "Moshiko: No text tokens generated"
                     else:
                         text_output = "Moshiko fallback: LM generator not available"
                         logger.warning("⚠️ LM generator not available, using fallback")

 import uvicorn
 # Version tracking
+VERSION = "1.4.9"
 COMMIT_SHA = "TBD"
 # Configure logging
                 try:
                     # Use the actual language model for generation
                     if lm_gen and lm_gen != "mock":
+                        logger.info(f"🔧 LMGen type: {type(lm_gen)}")
+                        logger.info(f"🔧 LMGen methods: {[m for m in dir(lm_gen) if not m.startswith('_')]}")
+                        # Try simpler approach - maybe streaming context is the issue
+                        try:
+                            # First try without streaming context
+                            logger.info("🧪 Trying step() without streaming context...")
+                            code_step = audio_tokens[:, :, 0:1]  # Just first timestep [B, 8, 1]
+                            tokens_out = lm_gen.step(code_step)
+                            logger.info(f"🔍 Direct step result: {type(tokens_out)}, value: {tokens_out}")
+                            if tokens_out is None:
+                                # Try with streaming context
+                                logger.info("🧪 Trying with streaming context...")
+                                with lm_gen.streaming(1):
+                                    tokens_out = lm_gen.step(code_step)
+                                    logger.info(f"🔍 Streaming step result: {type(tokens_out)}, value: {tokens_out}")
+                            if tokens_out is None:
+                                # Maybe we need to call a different method or check state
+                                logger.error("🚨 Both approaches returned None - checking LMGen state")
+                                logger.info(f"🔧 LMGen attributes: {vars(lm_gen) if hasattr(lm_gen, '__dict__') else 'No __dict__'}")
+                                text_output = "Moshiko: LMGen step() returns None - API issue"
                             else:
+                                logger.info(f"✅ Got tokens! Shape: {tokens_out.shape if hasattr(tokens_out, 'shape') else 'No shape'}")
+                                text_output = f"Moshiko CPU: Successfully generated tokens with shape {tokens_out.shape if hasattr(tokens_out, 'shape') else 'unknown'}"
+                        except Exception as step_error:
+                            logger.error(f"🚨 LMGen step error: {step_error}")
+                            text_output = f"Moshiko: LMGen step error: {str(step_error)}"
                     else:
                         text_output = "Moshiko fallback: LM generator not available"
                         logger.warning("⚠️ LM generator not available, using fallback")