prithivMLmods commited on
Commit
f5323b5
·
verified ·
1 Parent(s): 1695fa3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -8
app.py CHANGED
@@ -5,7 +5,7 @@ import spaces
5
  import os
6
  import tempfile
7
  from PIL import Image, ImageDraw
8
- import re # Import thư viện regular expression
9
 
10
  # --- 1. Load Model and Tokenizer (Done only once at startup) ---
11
  print("Loading model and tokenizer...")
@@ -14,10 +14,16 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
14
  # Load the model to CPU first; it will be moved to GPU during processing
15
  model = AutoModel.from_pretrained(
16
  model_name,
17
- #_attn_implementation="flash_attention_2",
18
  trust_remote_code=True,
19
  use_safetensors=True,
20
  )
 
 
 
 
 
 
 
21
  model = model.eval()
22
  print("✅ Model loaded successfully.")
23
 
@@ -92,14 +98,14 @@ def process_ocr_task(image, model_size, task_type, ref_text):
92
 
93
  # --- NEW LOGIC: Always try to find and draw all bounding boxes ---
94
  result_image_pil = None
95
-
96
  # Define the pattern to find all coordinates like [[280, 15, 696, 997]]
97
  pattern = re.compile(r"<\|det\|>\[\[(\d+),\s*(\d+),\s*(\d+),\s*(\d+)\]\]<\|/det\|>")
98
  matches = list(pattern.finditer(text_result)) # Use finditer to get all matches
99
 
100
  if matches:
101
  print(f"✅ Found {len(matches)} bounding box(es). Drawing on the original image.")
102
-
103
  # Create a copy of the original image to draw on
104
  image_with_bboxes = image.copy()
105
  draw = ImageDraw.Draw(image_with_bboxes)
@@ -109,22 +115,22 @@ def process_ocr_task(image, model_size, task_type, ref_text):
109
  # Extract coordinates as integers
110
  coords_norm = [int(c) for c in match.groups()]
111
  x1_norm, y1_norm, x2_norm, y2_norm = coords_norm
112
-
113
  # Scale the normalized coordinates (from 1000x1000 space) to the image's actual size
114
  x1 = int(x1_norm / 1000 * w)
115
  y1 = int(y1_norm / 1000 * h)
116
  x2 = int(x2_norm / 1000 * w)
117
  y2 = int(y2_norm / 1000 * h)
118
-
119
  # Draw the rectangle with a red outline, 3 pixels wide
120
  draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
121
-
122
  result_image_pil = image_with_bboxes
123
  else:
124
  # If no coordinates are found in the text, fall back to finding a pre-generated image
125
  print("⚠️ No bounding box coordinates found in text result. Falling back to search for a result image file.")
126
  result_image_pil = find_result_image(output_path)
127
-
128
  return text_result, result_image_pil
129
 
130
 
 
5
  import os
6
  import tempfile
7
  from PIL import Image, ImageDraw
8
+ import re # Import the regular expression library
9
 
10
  # --- 1. Load Model and Tokenizer (Done only once at startup) ---
11
  print("Loading model and tokenizer...")
 
14
  # Load the model to CPU first; it will be moved to GPU during processing
15
  model = AutoModel.from_pretrained(
16
  model_name,
 
17
  trust_remote_code=True,
18
  use_safetensors=True,
19
  )
20
+
21
+ # ------------------- FIX -------------------
22
+ # The generate function requires use_cache=True to be explicitly set
23
+ # in the model's configuration to avoid an IndexError during inference.
24
+ model.config.use_cache = True
25
+ # ---------------- END FIX ------------------
26
+
27
  model = model.eval()
28
  print("✅ Model loaded successfully.")
29
 
 
98
 
99
  # --- NEW LOGIC: Always try to find and draw all bounding boxes ---
100
  result_image_pil = None
101
+
102
  # Define the pattern to find all coordinates like [[280, 15, 696, 997]]
103
  pattern = re.compile(r"<\|det\|>\[\[(\d+),\s*(\d+),\s*(\d+),\s*(\d+)\]\]<\|/det\|>")
104
  matches = list(pattern.finditer(text_result)) # Use finditer to get all matches
105
 
106
  if matches:
107
  print(f"✅ Found {len(matches)} bounding box(es). Drawing on the original image.")
108
+
109
  # Create a copy of the original image to draw on
110
  image_with_bboxes = image.copy()
111
  draw = ImageDraw.Draw(image_with_bboxes)
 
115
  # Extract coordinates as integers
116
  coords_norm = [int(c) for c in match.groups()]
117
  x1_norm, y1_norm, x2_norm, y2_norm = coords_norm
118
+
119
  # Scale the normalized coordinates (from 1000x1000 space) to the image's actual size
120
  x1 = int(x1_norm / 1000 * w)
121
  y1 = int(y1_norm / 1000 * h)
122
  x2 = int(x2_norm / 1000 * w)
123
  y2 = int(y2_norm / 1000 * h)
124
+
125
  # Draw the rectangle with a red outline, 3 pixels wide
126
  draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
127
+
128
  result_image_pil = image_with_bboxes
129
  else:
130
  # If no coordinates are found in the text, fall back to finding a pre-generated image
131
  print("⚠️ No bounding box coordinates found in text result. Falling back to search for a result image file.")
132
  result_image_pil = find_result_image(output_path)
133
+
134
  return text_result, result_image_pil
135
 
136