wealthcoders
/

deepseek-OCR

Model card Files Files and versions

wealthcoders commited on Nov 28, 2025

Commit

82c7d5c

·

verified ·

1 Parent(s): 59c9346

Update handler.py

Files changed (1) hide show

handler.py +11 -8

handler.py CHANGED Viewed

@@ -14,24 +14,27 @@ class EndpointHandler:
         self.tokenizer = AutoTokenizer.from_pretrained(
             model_path,
             trust_remote_code=True,
-            local_files_only=bool(model_dir)  # Only use local files if model_dir is provided
         )
         # Check if CUDA is available
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        # Load model with appropriate settings
         model_kwargs = {
             'trust_remote_code': True,
-            'torch_dtype': torch.bfloat16 if self.device == 'cuda' else torch.float32
         }
-        # Add flash attention if available and on CUDA
         if self.device == 'cuda':
-            try:
-                model_kwargs['_attn_implementation'] = 'flash_attention_2'
-            except:
-                pass  # Fall back to default if flash attention not available
         self.model = AutoModel.from_pretrained(model_path, **model_kwargs)
         self.model = self.model.eval()

         self.tokenizer = AutoTokenizer.from_pretrained(
             model_path,
             trust_remote_code=True,
+            local_files_only=bool(model_dir)
         )
         # Check if CUDA is available
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        print(f"Using device: {self.device}")
+        # Load model WITHOUT flash attention
         model_kwargs = {
             'trust_remote_code': True,
         }
+        # Use appropriate dtype based on GPU capability
         if self.device == 'cuda':
+            # T4 and L4 work better with float16
+            model_kwargs['torch_dtype'] = torch.float16
+        else:
+            model_kwargs['torch_dtype'] = torch.float32
+        # Explicitly disable flash attention
+        model_kwargs['_attn_implementation'] = 'eager'
         self.model = AutoModel.from_pretrained(model_path, **model_kwargs)
         self.model = self.model.eval()