histlearn commited on
Commit
2dd975b
·
verified ·
1 Parent(s): a2ad1d2

fix: cast emb ao dtype da head (corrige F.linear em CPU sem autocast)

Browse files
Files changed (1) hide show
  1. inference.py +3 -1
inference.py CHANGED
@@ -181,7 +181,9 @@ def predict_batch(
181
  out = encoder(**toks)
182
  emb = last_token_pool(out.last_hidden_state, toks["attention_mask"])
183
  emb = F.normalize(emb, p=2, dim=1)
184
- logits = head(emb).squeeze(-1)
 
 
185
  p = torch.sigmoid(logits).float().cpu().numpy()
186
  preds.append(p)
187
 
 
181
  out = encoder(**toks)
182
  emb = last_token_pool(out.last_hidden_state, toks["attention_mask"])
183
  emb = F.normalize(emb, p=2, dim=1)
184
+ # Em CPU sem autocast, o encoder sai em fp16 e a head permanece em fp32 →
185
+ # F.linear recusa. Igualar ao dtype da head resolve (inofensivo em GPU).
186
+ logits = head(emb.to(head.weight.dtype)).squeeze(-1)
187
  p = torch.sigmoid(logits).float().cpu().numpy()
188
  preds.append(p)
189