DeepSeek-OCR-experimental

Running on Zero

App Files Files Community

prithivMLmods commited on Oct 29

Commit

e4bacdf

verified ·

1 Parent(s): ecdd904

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -25

app.py CHANGED Viewed

@@ -1,34 +1,58 @@
 import gradio as gr
 import torch
 from transformers import AutoModel, AutoTokenizer
 import spaces
 import os
 import tempfile
 from PIL import Image, ImageDraw
 import re
-# --- 1. Load Model and Tokenizer directly to the correct device ---
-print("Determining device...")
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(f"✅ Using device: {device}")
-print("Loading model and tokenizer...")
-model_name = "lvyufeng/DeepSeek-OCR-Community-Latest"
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-# Load the model directly to the specified device and set to evaluation mode
 model = AutoModel.from_pretrained(
-    model_name,
-    _attn_implementation="flash_attention_2",
-    trust_remote_code=True,
-    use_safetensors=True,
-).to(device).eval() # Move to device and set to eval mode
-# Also apply the desired dtype if using a GPU
-if device.type == 'cuda':
-    model = model.to(torch.bfloat16)
-print("✅ Model loaded successfully to device and in eval mode.")
 # --- Helper function to find pre-generated result images ---
@@ -42,17 +66,17 @@ def find_result_image(path):
                 print(f"Error opening result image {filename}: {e}")
     return None
-# --- 2. Main Processing Function (Simplified) ---
 @spaces.GPU
 def process_ocr_task(image, model_size, task_type, ref_text):
     """
-    Processes an image with DeepSeek-OCR. The model is already on the correct device.
     """
     if image is None:
         return "Please upload an image first.", None
-    # No need to move the model to GPU here; it's already done at startup.
-    print("✅ Model is already on the designated device.")
     with tempfile.TemporaryDirectory() as output_path:
         # Build the prompt
@@ -83,7 +107,6 @@ def process_ocr_task(image, model_size, task_type, ref_text):
         config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
         print(f"🏃 Running inference with prompt: {prompt}")
-        # Use the globally defined 'model' which is already on the GPU
         text_result = model.infer(
             tokenizer,
             prompt=prompt,

 import gradio as gr
 import torch
 from transformers import AutoModel, AutoTokenizer
+from huggingface_hub import snapshot_download
 import spaces
 import os
 import tempfile
 from PIL import Image, ImageDraw
 import re
+# --- 1. Download Model to a Local Cache, Modify, and Load ---
+print("Downloading and setting up model from Hugging Face Hub...")
+# Define a cache path for the model
+CACHE_PATH = "./model_cache"
+if not os.path.exists(CACHE_PATH):
+    os.makedirs(CACHE_PATH)
+# Download the model repository to the local directory
+model_path_local = snapshot_download(
+    repo_id='strangervisionhf/deepseek-ocr-latest-transformers',
+    local_dir=os.path.join(CACHE_PATH, 'deepseek.ocr'),
+    max_workers=8, # Adjusted for typical connection speeds
+    local_dir_use_symlinks=False
+)
+print(f"✅ Model downloaded to: {model_path_local}")
+# --- Remove the specified file after downloading ---
+file_to_remove = os.path.join(model_path_local, "modeling_deepseekv2.py")
+if os.path.exists(file_to_remove):
+    try:
+        os.remove(file_to_remove)
+        print(f"✅ Successfully removed file: {file_to_remove}")
+    except OSError as e:
+        print(f"❌ Error removing file {file_to_remove}: {e}")
+else:
+    print(f"⚠️ File not found, could not remove: {file_to_remove}")
+# --- Load tokenizer and model from the local path ---
+print("Loading model and tokenizer from local cache...")
+MODEL_PATH = model_path_local
+tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
+# Load the model with automatic device mapping and bfloat16 for efficiency
 model = AutoModel.from_pretrained(
+    MODEL_PATH,
+    attn_implementation="flash_attention_2",
+    torch_dtype=torch.bfloat16,
+    device_map="auto", # Automatically maps model to available GPU(s)/CPU
+    trust_remote_code=True
+).eval()
+print("✅ Model loaded successfully with automatic device mapping.")
 # --- Helper function to find pre-generated result images ---
                 print(f"Error opening result image {filename}: {e}")
     return None
+# --- 2. Main Processing Function (No changes needed here) ---
 @spaces.GPU
 def process_ocr_task(image, model_size, task_type, ref_text):
     """
+    Processes an image with DeepSeek-OCR. Model is already loaded on the correct device.
     """
     if image is None:
         return "Please upload an image first.", None
+    # No need to move the model; device_map="auto" handled it at load time.
+    print("✅ Model is already on the designated device(s).")
     with tempfile.TemporaryDirectory() as output_path:
         # Build the prompt
         config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
         print(f"🏃 Running inference with prompt: {prompt}")
         text_result = model.infer(
             tokenizer,
             prompt=prompt,