SoSolaris
/

stt

Safetensors

whisper

Model card Files Files and versions

xet

Community

SoSolaris commited on Aug 20, 2025

Commit

afe3147

verified ·

1 Parent(s): f7c80df

Update handler.py

Browse files

Files changed (1) hide show

handler.py +8 -9

handler.py CHANGED Viewed

@@ -40,9 +40,12 @@ class EndpointHandler:
                     except Exception as e2:
                         print(f"Compilation failed: {e2}")
-            # forced_decoder_ids pour français (comme fastapi)
-            self.french_decoder_ids = self.processor.get_decoder_prompt_ids(
-                language="french", task="transcribe"
             )
             print("Model loaded and optimized successfully!")
@@ -55,7 +58,6 @@ class EndpointHandler:
             inputs = data.get("inputs", "")
             parameters = data.get("parameters", {})
-            # decode audio (base64 string or bytes)
             if isinstance(inputs, str):
                 try:
                     audio_bytes = base64.b64decode(inputs)
@@ -66,11 +68,9 @@ class EndpointHandler:
             else:
                 return {"error": "Invalid input format. Expected base64 string or bytes"}
-            # check size
             if len(audio_bytes) > 25 * 1024 * 1024:
                 return {"error": "File too large (max 25MB)"}
-            # load audio
             audio_array, _ = librosa.load(
                 io.BytesIO(audio_bytes),
                 sr=16000,
@@ -91,15 +91,15 @@ class EndpointHandler:
                 for k, v in model_inputs.items()
             }
-            # params
             max_length = parameters.get("max_length", 256)
             num_beams = parameters.get("num_beams", 6)
             temperature = parameters.get("temperature", 0.0)
-            # generate
             with torch.no_grad(), torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
                 predicted_ids = self.model.generate(
                     **model_inputs,
                     max_length=max_length,
                     num_beams=num_beams,
                     temperature=temperature,
@@ -110,7 +110,6 @@ class EndpointHandler:
                     length_penalty=1.0,
                     use_cache=True,
                     pad_token_id=self.processor.tokenizer.eos_token_id,
-                    forced_decoder_ids=self.french_decoder_ids,  # ✅ identique à fastapi
                     suppress_tokens=[],
                     begin_suppress_tokens=[]
                 )

                     except Exception as e2:
                         print(f"Compilation failed: {e2}")
+            # compute decoder_input_ids for french
+            forced_ids = self.processor.get_decoder_prompt_ids(language="french", task="transcribe")
+            # convert to tensor [ [id1,id2,...] ]
+            self.french_decoder_input_ids = torch.tensor(
+                [[tok_id for _, tok_id in forced_ids]],
+                device="cuda" if torch.cuda.is_available() else "cpu"
             )
             print("Model loaded and optimized successfully!")
             inputs = data.get("inputs", "")
             parameters = data.get("parameters", {})
             if isinstance(inputs, str):
                 try:
                     audio_bytes = base64.b64decode(inputs)
             else:
                 return {"error": "Invalid input format. Expected base64 string or bytes"}
             if len(audio_bytes) > 25 * 1024 * 1024:
                 return {"error": "File too large (max 25MB)"}
             audio_array, _ = librosa.load(
                 io.BytesIO(audio_bytes),
                 sr=16000,
                 for k, v in model_inputs.items()
             }
             max_length = parameters.get("max_length", 256)
             num_beams = parameters.get("num_beams", 6)
             temperature = parameters.get("temperature", 0.0)
             with torch.no_grad(), torch.inference_mode(), torch.autocast(device_type="cuda", dtype=torch.float16):
                 predicted_ids = self.model.generate(
                     **model_inputs,
+                    decoder_input_ids=self.french_decoder_input_ids,  # ✅ remplace forced_decoder_ids
+                    forced_decoder_ids=None,                          # ✅ évite le conflit
                     max_length=max_length,
                     num_beams=num_beams,
                     temperature=temperature,
                     length_penalty=1.0,
                     use_cache=True,
                     pad_token_id=self.processor.tokenizer.eos_token_id,
                     suppress_tokens=[],
                     begin_suppress_tokens=[]
                 )