prithivMLmods commited on
Commit
e4bacdf
Β·
verified Β·
1 Parent(s): ecdd904

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -25
app.py CHANGED
@@ -1,34 +1,58 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModel, AutoTokenizer
 
4
  import spaces
5
  import os
6
  import tempfile
7
  from PIL import Image, ImageDraw
8
  import re
9
 
10
- # --- 1. Load Model and Tokenizer directly to the correct device ---
11
- print("Determining device...")
12
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
- print(f"βœ… Using device: {device}")
14
-
15
- print("Loading model and tokenizer...")
16
- model_name = "lvyufeng/DeepSeek-OCR-Community-Latest"
17
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
18
-
19
- # Load the model directly to the specified device and set to evaluation mode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  model = AutoModel.from_pretrained(
21
- model_name,
22
- _attn_implementation="flash_attention_2",
23
- trust_remote_code=True,
24
- use_safetensors=True,
25
- ).to(device).eval() # Move to device and set to eval mode
26
-
27
- # Also apply the desired dtype if using a GPU
28
- if device.type == 'cuda':
29
- model = model.to(torch.bfloat16)
30
 
31
- print("βœ… Model loaded successfully to device and in eval mode.")
32
 
33
 
34
  # --- Helper function to find pre-generated result images ---
@@ -42,17 +66,17 @@ def find_result_image(path):
42
  print(f"Error opening result image {filename}: {e}")
43
  return None
44
 
45
- # --- 2. Main Processing Function (Simplified) ---
46
  @spaces.GPU
47
  def process_ocr_task(image, model_size, task_type, ref_text):
48
  """
49
- Processes an image with DeepSeek-OCR. The model is already on the correct device.
50
  """
51
  if image is None:
52
  return "Please upload an image first.", None
53
 
54
- # No need to move the model to GPU here; it's already done at startup.
55
- print("βœ… Model is already on the designated device.")
56
 
57
  with tempfile.TemporaryDirectory() as output_path:
58
  # Build the prompt
@@ -83,7 +107,6 @@ def process_ocr_task(image, model_size, task_type, ref_text):
83
  config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
84
 
85
  print(f"πŸƒ Running inference with prompt: {prompt}")
86
- # Use the globally defined 'model' which is already on the GPU
87
  text_result = model.infer(
88
  tokenizer,
89
  prompt=prompt,
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModel, AutoTokenizer
4
+ from huggingface_hub import snapshot_download
5
  import spaces
6
  import os
7
  import tempfile
8
  from PIL import Image, ImageDraw
9
  import re
10
 
11
+ # --- 1. Download Model to a Local Cache, Modify, and Load ---
12
+
13
+ print("Downloading and setting up model from Hugging Face Hub...")
14
+
15
+ # Define a cache path for the model
16
+ CACHE_PATH = "./model_cache"
17
+ if not os.path.exists(CACHE_PATH):
18
+ os.makedirs(CACHE_PATH)
19
+
20
+ # Download the model repository to the local directory
21
+ model_path_local = snapshot_download(
22
+ repo_id='strangervisionhf/deepseek-ocr-latest-transformers',
23
+ local_dir=os.path.join(CACHE_PATH, 'deepseek.ocr'),
24
+ max_workers=8, # Adjusted for typical connection speeds
25
+ local_dir_use_symlinks=False
26
+ )
27
+ print(f"βœ… Model downloaded to: {model_path_local}")
28
+
29
+ # --- Remove the specified file after downloading ---
30
+ file_to_remove = os.path.join(model_path_local, "modeling_deepseekv2.py")
31
+ if os.path.exists(file_to_remove):
32
+ try:
33
+ os.remove(file_to_remove)
34
+ print(f"βœ… Successfully removed file: {file_to_remove}")
35
+ except OSError as e:
36
+ print(f"❌ Error removing file {file_to_remove}: {e}")
37
+ else:
38
+ print(f"⚠️ File not found, could not remove: {file_to_remove}")
39
+
40
+
41
+ # --- Load tokenizer and model from the local path ---
42
+ print("Loading model and tokenizer from local cache...")
43
+ MODEL_PATH = model_path_local
44
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
45
+
46
+ # Load the model with automatic device mapping and bfloat16 for efficiency
47
  model = AutoModel.from_pretrained(
48
+ MODEL_PATH,
49
+ attn_implementation="flash_attention_2",
50
+ torch_dtype=torch.bfloat16,
51
+ device_map="auto", # Automatically maps model to available GPU(s)/CPU
52
+ trust_remote_code=True
53
+ ).eval()
 
 
 
54
 
55
+ print("βœ… Model loaded successfully with automatic device mapping.")
56
 
57
 
58
  # --- Helper function to find pre-generated result images ---
 
66
  print(f"Error opening result image {filename}: {e}")
67
  return None
68
 
69
+ # --- 2. Main Processing Function (No changes needed here) ---
70
  @spaces.GPU
71
  def process_ocr_task(image, model_size, task_type, ref_text):
72
  """
73
+ Processes an image with DeepSeek-OCR. Model is already loaded on the correct device.
74
  """
75
  if image is None:
76
  return "Please upload an image first.", None
77
 
78
+ # No need to move the model; device_map="auto" handled it at load time.
79
+ print("βœ… Model is already on the designated device(s).")
80
 
81
  with tempfile.TemporaryDirectory() as output_path:
82
  # Build the prompt
 
107
  config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
108
 
109
  print(f"πŸƒ Running inference with prompt: {prompt}")
 
110
  text_result = model.infer(
111
  tokenizer,
112
  prompt=prompt,