DeepSeek-OCR-experimental

Running on Zero

App Files Files Community

prithivMLmods commited on Oct 29

Commit

e620b93

verified ·

1 Parent(s): c2b1d2c

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -48

app.py CHANGED Viewed

@@ -1,58 +1,34 @@
 import gradio as gr
 import torch
 from transformers import AutoModel, AutoTokenizer
-from huggingface_hub import snapshot_download
 import spaces
 import os
 import tempfile
 from PIL import Image, ImageDraw
 import re
-# --- 1. Download Model to a Local Cache, Modify, and Load ---
-print("Downloading and setting up model from Hugging Face Hub...")
-# Define a cache path for the model
-CACHE_PATH = "./model_cache"
-if not os.path.exists(CACHE_PATH):
-    os.makedirs(CACHE_PATH)
-# Download the model repository to the local directory
-model_path_local = snapshot_download(
-    repo_id='strangervisionhf/deepseek-ocr-latest-transformers',
-    local_dir=os.path.join(CACHE_PATH, 'deepseek.ocr'),
-    max_workers=8, # Adjusted for typical connection speeds
-    local_dir_use_symlinks=False
-)
-print(f"✅ Model downloaded to: {model_path_local}")
-# --- Remove the specified file after downloading ---
-file_to_remove = os.path.join(model_path_local, "modeling_deepseekv2.py")
-if os.path.exists(file_to_remove):
-    try:
-        os.remove(file_to_remove)
-        print(f"✅ Successfully removed file: {file_to_remove}")
-    except OSError as e:
-        print(f"❌ Error removing file {file_to_remove}: {e}")
-else:
-    print(f"⚠️ File not found, could not remove: {file_to_remove}")
-# --- Load tokenizer and model from the local path ---
-print("Loading model and tokenizer from local cache...")
-MODEL_PATH = model_path_local
-tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
-# Load the model with automatic device mapping and bfloat16 for efficiency
 model = AutoModel.from_pretrained(
-    MODEL_PATH,
-    attn_implementation="flash_attention_2",
-    torch_dtype=torch.bfloat16,
-    device_map="auto", # Automatically maps model to available GPU(s)/CPU
-    trust_remote_code=True
-).eval()
-print("✅ Model loaded successfully with automatic device mapping.")
 # --- Helper function to find pre-generated result images ---
@@ -66,17 +42,17 @@ def find_result_image(path):
                 print(f"Error opening result image {filename}: {e}")
     return None
-# --- 2. Main Processing Function (No changes needed here) ---
 @spaces.GPU
 def process_ocr_task(image, model_size, task_type, ref_text):
     """
-    Processes an image with DeepSeek-OCR. Model is already loaded on the correct device.
     """
     if image is None:
         return "Please upload an image first.", None
-    # No need to move the model; device_map="auto" handled it at load time.
-    print("✅ Model is already on the designated device(s).")
     with tempfile.TemporaryDirectory() as output_path:
         # Build the prompt
@@ -107,6 +83,7 @@ def process_ocr_task(image, model_size, task_type, ref_text):
         config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
         print(f"🏃 Running inference with prompt: {prompt}")
         text_result = model.infer(
             tokenizer,
             prompt=prompt,

 import gradio as gr
 import torch
 from transformers import AutoModel, AutoTokenizer
 import spaces
 import os
 import tempfile
 from PIL import Image, ImageDraw
 import re
+# --- 1. Load Model and Tokenizer directly to the correct device ---
+print("Determining device...")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"✅ Using device: {device}")
+print("Loading model and tokenizer...")
+model_name = "deepseek-ai/DeepSeek-OCR"
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+# Load the model directly to the specified device and set to evaluation mode
 model = AutoModel.from_pretrained(
+    model_name,
+    _attn_implementation="flash_attention_2",
+    trust_remote_code=True,
+    use_safetensors=True,
+).to(device).eval() # Move to device and set to eval mode
+# Also apply the desired dtype if using a GPU
+if device.type == 'cuda':
+    model = model.to(torch.bfloat16)
+print("✅ Model loaded successfully to device and in eval mode.")
 # --- Helper function to find pre-generated result images ---
                 print(f"Error opening result image {filename}: {e}")
     return None
+# --- 2. Main Processing Function (Simplified) ---
 @spaces.GPU
 def process_ocr_task(image, model_size, task_type, ref_text):
     """
+    Processes an image with DeepSeek-OCR. The model is already on the correct device.
     """
     if image is None:
         return "Please upload an image first.", None
+    # No need to move the model to GPU here; it's already done at startup.
+    print("✅ Model is already on the designated device.")
     with tempfile.TemporaryDirectory() as output_path:
         # Build the prompt
         config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
         print(f"🏃 Running inference with prompt: {prompt}")
+        # Use the globally defined 'model' which is already on the GPU
         text_result = model.infer(
             tokenizer,
             prompt=prompt,