wealthcoders
/

deepseek-OCR

Model card Files Files and versions

wealthcoders commited on Nov 28, 2025

Commit

7b796cd

·

verified ·

1 Parent(s): 72c9ee9

Update handler.py

Files changed (1) hide show

handler.py +2 -8

handler.py CHANGED Viewed

@@ -21,18 +21,12 @@ class EndpointHandler:
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
         print(f"Using device: {self.device}")
-        # Load model WITHOUT flash attention
         model_kwargs = {
             'trust_remote_code': True,
         }
-        # Use appropriate dtype based on GPU capability
-        if self.device == 'cuda':
-            # T4 and L4 work better with float16
-            model_kwargs['torch_dtype'] = torch.float16
-        else:
-            model_kwargs['torch_dtype'] = torch.float32
         # Explicitly disable flash attention
         model_kwargs['_attn_implementation'] = 'eager'

         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
         print(f"Using device: {self.device}")
+       # Load model in float32 to avoid dtype conflicts
         model_kwargs = {
             'trust_remote_code': True,
+            'torch_dtype': torch.float32  # Use float32 instead of float16
         }
         # Explicitly disable flash attention
         model_kwargs['_attn_implementation'] = 'eager'