Spaces:
Running
Running
ming
commited on
Commit
·
cfe8d29
1
Parent(s):
12a2e7c
fix: Move inputs to model device in _single_chunk_summarize to fix CPU/GPU device mismatch
Browse files
app/services/hf_streaming_summarizer.py
CHANGED
|
@@ -619,6 +619,13 @@ class HFStreamingSummarizer:
|
|
| 619 |
|
| 620 |
inputs = _to_singleton_batch(inputs)
|
| 621 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
# Validate pad/eos ids
|
| 623 |
pad_id = self.tokenizer.pad_token_id
|
| 624 |
eos_id = self.tokenizer.eos_token_id
|
|
|
|
| 619 |
|
| 620 |
inputs = _to_singleton_batch(inputs)
|
| 621 |
|
| 622 |
+
# Move inputs to model device (required when model is on CUDA)
|
| 623 |
+
model_device = next(self.model.parameters()).device
|
| 624 |
+
inputs = {
|
| 625 |
+
k: v.to(model_device) if isinstance(v, torch.Tensor) else v
|
| 626 |
+
for k, v in inputs.items()
|
| 627 |
+
}
|
| 628 |
+
|
| 629 |
# Validate pad/eos ids
|
| 630 |
pad_id = self.tokenizer.pad_token_id
|
| 631 |
eos_id = self.tokenizer.eos_token_id
|