SoSolaris
/

stt

Safetensors

whisper

Model card Files Files and versions

xet

Community

SoSolaris commited on Aug 20, 2025

Commit

bb23501

verified ·

1 Parent(s): 1a35a23

Update handler.py

Browse files

Files changed (1) hide show

handler.py +10 -3

handler.py CHANGED Viewed

@@ -40,7 +40,7 @@ class EndpointHandler:
                     except Exception as e2:
                         print(f"Compilation failed: {e2}")
-            # compute decoder_input_ids for french
             forced_ids = self.processor.get_decoder_prompt_ids(language="french", task="transcribe")
             self.french_decoder_input_ids = torch.tensor(
                 [[tok_id for _, tok_id in forced_ids]],
@@ -57,6 +57,7 @@ class EndpointHandler:
             inputs = data.get("inputs", "")
             parameters = data.get("parameters", {})
             if isinstance(inputs, str):
                 try:
                     audio_bytes = base64.b64decode(inputs)
@@ -70,34 +71,40 @@ class EndpointHandler:
             if len(audio_bytes) > 25 * 1024 * 1024:
                 return {"error": "File too large (max 25MB)"}
             audio_array, _ = librosa.load(
                 io.BytesIO(audio_bytes),
                 sr=16000,
                 mono=True,
                 duration=30
             )
             if len(audio_array) == 0:
                 return {"error": "Invalid or empty audio file"}
             model_inputs = self.processor(
                 audio_array,
                 sampling_rate=16000,
                 return_tensors="pt"
             )
             model_inputs = {
                 k: v.to(self.model.device).half() if v.dtype == torch.float32 else v.to(self.model.device)
                 for k, v in model_inputs.items()
             }
             max_length = parameters.get("max_length", 256)
             num_beams = parameters.get("num_beams", 6)
             temperature = parameters.get("temperature", 0.0)
             with torch.no_grad(), torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
                 predicted_ids = self.model.generate(
                     **model_inputs,
-                    decoder_input_ids=self.french_decoder_input_ids,  # ✅ seul forçage
                     max_length=max_length,
                     num_beams=num_beams,
                     temperature=temperature,

                     except Exception as e2:
                         print(f"Compilation failed: {e2}")
+            # precompute decoder_input_ids for French transcription
             forced_ids = self.processor.get_decoder_prompt_ids(language="french", task="transcribe")
             self.french_decoder_input_ids = torch.tensor(
                 [[tok_id for _, tok_id in forced_ids]],
             inputs = data.get("inputs", "")
             parameters = data.get("parameters", {})
+            # decode audio
             if isinstance(inputs, str):
                 try:
                     audio_bytes = base64.b64decode(inputs)
             if len(audio_bytes) > 25 * 1024 * 1024:
                 return {"error": "File too large (max 25MB)"}
+            # load audio
             audio_array, _ = librosa.load(
                 io.BytesIO(audio_bytes),
                 sr=16000,
                 mono=True,
                 duration=30
             )
             if len(audio_array) == 0:
                 return {"error": "Invalid or empty audio file"}
+            # processor injecte forced_decoder_ids -> on les enlève
             model_inputs = self.processor(
                 audio_array,
                 sampling_rate=16000,
                 return_tensors="pt"
             )
+            if "forced_decoder_ids" in model_inputs:
+                del model_inputs["forced_decoder_ids"]
             model_inputs = {
                 k: v.to(self.model.device).half() if v.dtype == torch.float32 else v.to(self.model.device)
                 for k, v in model_inputs.items()
             }
+            # params
             max_length = parameters.get("max_length", 256)
             num_beams = parameters.get("num_beams", 6)
             temperature = parameters.get("temperature", 0.0)
+            # generate
             with torch.no_grad(), torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
                 predicted_ids = self.model.generate(
                     **model_inputs,
+                    decoder_input_ids=self.french_decoder_input_ids,  # ✅ seul forçage langue
                     max_length=max_length,
                     num_beams=num_beams,
                     temperature=temperature,