ming commited on
Commit
cfe8d29
·
1 Parent(s): 12a2e7c

fix: Move inputs to model device in _single_chunk_summarize to fix CPU/GPU device mismatch

Browse files
app/services/hf_streaming_summarizer.py CHANGED
@@ -619,6 +619,13 @@ class HFStreamingSummarizer:
619
 
620
  inputs = _to_singleton_batch(inputs)
621
 
 
 
 
 
 
 
 
622
  # Validate pad/eos ids
623
  pad_id = self.tokenizer.pad_token_id
624
  eos_id = self.tokenizer.eos_token_id
 
619
 
620
  inputs = _to_singleton_batch(inputs)
621
 
622
+ # Move inputs to model device (required when model is on CUDA)
623
+ model_device = next(self.model.parameters()).device
624
+ inputs = {
625
+ k: v.to(model_device) if isinstance(v, torch.Tensor) else v
626
+ for k, v in inputs.items()
627
+ }
628
+
629
  # Validate pad/eos ids
630
  pad_id = self.tokenizer.pad_token_id
631
  eos_id = self.tokenizer.eos_token_id