Ranam Hamoud commited on
Commit
8b3fa78
·
1 Parent(s): 521317f

Fix Whisper KV cache KeyError by disabling fp16 and adding fallback

Browse files
Files changed (1) hide show
  1. speech_recognizer.py +21 -7
speech_recognizer.py CHANGED
@@ -27,13 +27,27 @@ class SpeechRecognizer:
27
  task: str = "transcribe"
28
  ) -> Dict[str, any]:
29
  # Transcribe with Whisper (with word-level timestamps for better pause detection)
30
- result = self.model.transcribe(
31
- audio_path,
32
- language=language,
33
- task=task,
34
- verbose=False,
35
- word_timestamps=True
36
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  transcription = result['text'].strip()
39
  detected_language = result.get('language', 'unknown')
 
27
  task: str = "transcribe"
28
  ) -> Dict[str, any]:
29
  # Transcribe with Whisper (with word-level timestamps for better pause detection)
30
+ # Using fp16=False to avoid KV cache issues in production environments
31
+ try:
32
+ result = self.model.transcribe(
33
+ audio_path,
34
+ language=language,
35
+ task=task,
36
+ verbose=False,
37
+ word_timestamps=True,
38
+ fp16=False # Disable fp16 to avoid KV cache KeyError
39
+ )
40
+ except (KeyError, RuntimeError) as e:
41
+ # Fallback: transcribe without word timestamps if KV cache fails
42
+ print(f"Warning: Word-level timestamps failed ({e}), retrying without them...")
43
+ result = self.model.transcribe(
44
+ audio_path,
45
+ language=language,
46
+ task=task,
47
+ verbose=False,
48
+ word_timestamps=False,
49
+ fp16=False
50
+ )
51
 
52
  transcription = result['text'].strip()
53
  detected_language = result.get('language', 'unknown')