mazesmazes commited on
Commit
740914e
·
verified ·
1 Parent(s): cfaab62

Upload asr_pipeline.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. asr_pipeline.py +0 -9
asr_pipeline.py CHANGED
@@ -521,19 +521,12 @@ class ASRPipeline(transformers.AutomaticSpeechRecognitionPipeline):
521
  Returns:
522
  Dict with 'text' key containing transcription
523
  """
524
- # DEBUG: Track which code path we're using
525
- import sys
526
- print(f"[DEBUG postprocess] type(model_outputs)={type(model_outputs).__name__}", file=sys.stderr)
527
-
528
  # Handle list of outputs (from chunking)
529
  if isinstance(model_outputs, list):
530
- print(f"[DEBUG postprocess] list len={len(model_outputs)}", file=sys.stderr)
531
  model_outputs = model_outputs[0] if model_outputs else {}
532
 
533
  tokens = model_outputs.get("tokens")
534
- print(f"[DEBUG postprocess] tokens is None: {tokens is None}", file=sys.stderr)
535
  if tokens is None:
536
- print("[DEBUG postprocess] FALLING BACK TO SUPER", file=sys.stderr)
537
  return super().postprocess(model_outputs, **kwargs)
538
 
539
  if torch.is_tensor(tokens):
@@ -544,10 +537,8 @@ class ASRPipeline(transformers.AutomaticSpeechRecognitionPipeline):
544
  text = self.tokenizer.decode(tokens, skip_special_tokens=True).strip()
545
  # Strip <think>...</think> tags (Qwen3 doesn't respect /no_think prompt)
546
  text = re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL).strip()
547
- print(f"[DEBUG postprocess] BEFORE truncation: {len(text.split())} words", file=sys.stderr)
548
  # Post-process prediction
549
  text = self._post_process_prediction(text)
550
- print(f"[DEBUG postprocess] AFTER truncation: {len(text.split())} words", file=sys.stderr)
551
  return {"text": text}
552
 
553
  # Known hallucination patterns that should be deleted entirely
 
521
  Returns:
522
  Dict with 'text' key containing transcription
523
  """
 
 
 
 
524
  # Handle list of outputs (from chunking)
525
  if isinstance(model_outputs, list):
 
526
  model_outputs = model_outputs[0] if model_outputs else {}
527
 
528
  tokens = model_outputs.get("tokens")
 
529
  if tokens is None:
 
530
  return super().postprocess(model_outputs, **kwargs)
531
 
532
  if torch.is_tensor(tokens):
 
537
  text = self.tokenizer.decode(tokens, skip_special_tokens=True).strip()
538
  # Strip <think>...</think> tags (Qwen3 doesn't respect /no_think prompt)
539
  text = re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL).strip()
 
540
  # Post-process prediction
541
  text = self._post_process_prediction(text)
 
542
  return {"text": text}
543
 
544
  # Known hallucination patterns that should be deleted entirely