prithivMLmods commited on
Commit
e620b93
Β·
verified Β·
1 Parent(s): c2b1d2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -48
app.py CHANGED
@@ -1,58 +1,34 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModel, AutoTokenizer
4
- from huggingface_hub import snapshot_download
5
  import spaces
6
  import os
7
  import tempfile
8
  from PIL import Image, ImageDraw
9
  import re
10
 
11
- # --- 1. Download Model to a Local Cache, Modify, and Load ---
12
-
13
- print("Downloading and setting up model from Hugging Face Hub...")
14
-
15
- # Define a cache path for the model
16
- CACHE_PATH = "./model_cache"
17
- if not os.path.exists(CACHE_PATH):
18
- os.makedirs(CACHE_PATH)
19
-
20
- # Download the model repository to the local directory
21
- model_path_local = snapshot_download(
22
- repo_id='strangervisionhf/deepseek-ocr-latest-transformers',
23
- local_dir=os.path.join(CACHE_PATH, 'deepseek.ocr'),
24
- max_workers=8, # Adjusted for typical connection speeds
25
- local_dir_use_symlinks=False
26
- )
27
- print(f"βœ… Model downloaded to: {model_path_local}")
28
-
29
- # --- Remove the specified file after downloading ---
30
- file_to_remove = os.path.join(model_path_local, "modeling_deepseekv2.py")
31
- if os.path.exists(file_to_remove):
32
- try:
33
- os.remove(file_to_remove)
34
- print(f"βœ… Successfully removed file: {file_to_remove}")
35
- except OSError as e:
36
- print(f"❌ Error removing file {file_to_remove}: {e}")
37
- else:
38
- print(f"⚠️ File not found, could not remove: {file_to_remove}")
39
-
40
-
41
- # --- Load tokenizer and model from the local path ---
42
- print("Loading model and tokenizer from local cache...")
43
- MODEL_PATH = model_path_local
44
- tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
45
-
46
- # Load the model with automatic device mapping and bfloat16 for efficiency
47
  model = AutoModel.from_pretrained(
48
- MODEL_PATH,
49
- attn_implementation="flash_attention_2",
50
- torch_dtype=torch.bfloat16,
51
- device_map="auto", # Automatically maps model to available GPU(s)/CPU
52
- trust_remote_code=True
53
- ).eval()
 
 
 
54
 
55
- print("βœ… Model loaded successfully with automatic device mapping.")
56
 
57
 
58
  # --- Helper function to find pre-generated result images ---
@@ -66,17 +42,17 @@ def find_result_image(path):
66
  print(f"Error opening result image {filename}: {e}")
67
  return None
68
 
69
- # --- 2. Main Processing Function (No changes needed here) ---
70
  @spaces.GPU
71
  def process_ocr_task(image, model_size, task_type, ref_text):
72
  """
73
- Processes an image with DeepSeek-OCR. Model is already loaded on the correct device.
74
  """
75
  if image is None:
76
  return "Please upload an image first.", None
77
 
78
- # No need to move the model; device_map="auto" handled it at load time.
79
- print("βœ… Model is already on the designated device(s).")
80
 
81
  with tempfile.TemporaryDirectory() as output_path:
82
  # Build the prompt
@@ -107,6 +83,7 @@ def process_ocr_task(image, model_size, task_type, ref_text):
107
  config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
108
 
109
  print(f"πŸƒ Running inference with prompt: {prompt}")
 
110
  text_result = model.infer(
111
  tokenizer,
112
  prompt=prompt,
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModel, AutoTokenizer
 
4
  import spaces
5
  import os
6
  import tempfile
7
  from PIL import Image, ImageDraw
8
  import re
9
 
10
+ # --- 1. Load Model and Tokenizer directly to the correct device ---
11
+ print("Determining device...")
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+ print(f"βœ… Using device: {device}")
14
+
15
+ print("Loading model and tokenizer...")
16
+ model_name = "deepseek-ai/DeepSeek-OCR"
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
18
+
19
+ # Load the model directly to the specified device and set to evaluation mode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  model = AutoModel.from_pretrained(
21
+ model_name,
22
+ _attn_implementation="flash_attention_2",
23
+ trust_remote_code=True,
24
+ use_safetensors=True,
25
+ ).to(device).eval() # Move to device and set to eval mode
26
+
27
+ # Also apply the desired dtype if using a GPU
28
+ if device.type == 'cuda':
29
+ model = model.to(torch.bfloat16)
30
 
31
+ print("βœ… Model loaded successfully to device and in eval mode.")
32
 
33
 
34
  # --- Helper function to find pre-generated result images ---
 
42
  print(f"Error opening result image {filename}: {e}")
43
  return None
44
 
45
+ # --- 2. Main Processing Function (Simplified) ---
46
  @spaces.GPU
47
  def process_ocr_task(image, model_size, task_type, ref_text):
48
  """
49
+ Processes an image with DeepSeek-OCR. The model is already on the correct device.
50
  """
51
  if image is None:
52
  return "Please upload an image first.", None
53
 
54
+ # No need to move the model to GPU here; it's already done at startup.
55
+ print("βœ… Model is already on the designated device.")
56
 
57
  with tempfile.TemporaryDirectory() as output_path:
58
  # Build the prompt
 
83
  config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
84
 
85
  print(f"πŸƒ Running inference with prompt: {prompt}")
86
+ # Use the globally defined 'model' which is already on the GPU
87
  text_result = model.infer(
88
  tokenizer,
89
  prompt=prompt,