Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ import spaces
|
|
| 5 |
import os
|
| 6 |
import tempfile
|
| 7 |
from PIL import Image, ImageDraw
|
| 8 |
-
import re # Import
|
| 9 |
|
| 10 |
# --- 1. Load Model and Tokenizer (Done only once at startup) ---
|
| 11 |
print("Loading model and tokenizer...")
|
|
@@ -14,10 +14,16 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
| 14 |
# Load the model to CPU first; it will be moved to GPU during processing
|
| 15 |
model = AutoModel.from_pretrained(
|
| 16 |
model_name,
|
| 17 |
-
#_attn_implementation="flash_attention_2",
|
| 18 |
trust_remote_code=True,
|
| 19 |
use_safetensors=True,
|
| 20 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
model = model.eval()
|
| 22 |
print("✅ Model loaded successfully.")
|
| 23 |
|
|
@@ -92,14 +98,14 @@ def process_ocr_task(image, model_size, task_type, ref_text):
|
|
| 92 |
|
| 93 |
# --- NEW LOGIC: Always try to find and draw all bounding boxes ---
|
| 94 |
result_image_pil = None
|
| 95 |
-
|
| 96 |
# Define the pattern to find all coordinates like [[280, 15, 696, 997]]
|
| 97 |
pattern = re.compile(r"<\|det\|>\[\[(\d+),\s*(\d+),\s*(\d+),\s*(\d+)\]\]<\|/det\|>")
|
| 98 |
matches = list(pattern.finditer(text_result)) # Use finditer to get all matches
|
| 99 |
|
| 100 |
if matches:
|
| 101 |
print(f"✅ Found {len(matches)} bounding box(es). Drawing on the original image.")
|
| 102 |
-
|
| 103 |
# Create a copy of the original image to draw on
|
| 104 |
image_with_bboxes = image.copy()
|
| 105 |
draw = ImageDraw.Draw(image_with_bboxes)
|
|
@@ -109,22 +115,22 @@ def process_ocr_task(image, model_size, task_type, ref_text):
|
|
| 109 |
# Extract coordinates as integers
|
| 110 |
coords_norm = [int(c) for c in match.groups()]
|
| 111 |
x1_norm, y1_norm, x2_norm, y2_norm = coords_norm
|
| 112 |
-
|
| 113 |
# Scale the normalized coordinates (from 1000x1000 space) to the image's actual size
|
| 114 |
x1 = int(x1_norm / 1000 * w)
|
| 115 |
y1 = int(y1_norm / 1000 * h)
|
| 116 |
x2 = int(x2_norm / 1000 * w)
|
| 117 |
y2 = int(y2_norm / 1000 * h)
|
| 118 |
-
|
| 119 |
# Draw the rectangle with a red outline, 3 pixels wide
|
| 120 |
draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
|
| 121 |
-
|
| 122 |
result_image_pil = image_with_bboxes
|
| 123 |
else:
|
| 124 |
# If no coordinates are found in the text, fall back to finding a pre-generated image
|
| 125 |
print("⚠️ No bounding box coordinates found in text result. Falling back to search for a result image file.")
|
| 126 |
result_image_pil = find_result_image(output_path)
|
| 127 |
-
|
| 128 |
return text_result, result_image_pil
|
| 129 |
|
| 130 |
|
|
|
|
| 5 |
import os
|
| 6 |
import tempfile
|
| 7 |
from PIL import Image, ImageDraw
|
| 8 |
+
import re # Import the regular expression library
|
| 9 |
|
| 10 |
# --- 1. Load Model and Tokenizer (Done only once at startup) ---
|
| 11 |
print("Loading model and tokenizer...")
|
|
|
|
| 14 |
# Load the model to CPU first; it will be moved to GPU during processing
|
| 15 |
model = AutoModel.from_pretrained(
|
| 16 |
model_name,
|
|
|
|
| 17 |
trust_remote_code=True,
|
| 18 |
use_safetensors=True,
|
| 19 |
)
|
| 20 |
+
|
| 21 |
+
# ------------------- FIX -------------------
|
| 22 |
+
# The generate function requires use_cache=True to be explicitly set
|
| 23 |
+
# in the model's configuration to avoid an IndexError during inference.
|
| 24 |
+
model.config.use_cache = True
|
| 25 |
+
# ---------------- END FIX ------------------
|
| 26 |
+
|
| 27 |
model = model.eval()
|
| 28 |
print("✅ Model loaded successfully.")
|
| 29 |
|
|
|
|
| 98 |
|
| 99 |
# --- NEW LOGIC: Always try to find and draw all bounding boxes ---
|
| 100 |
result_image_pil = None
|
| 101 |
+
|
| 102 |
# Define the pattern to find all coordinates like [[280, 15, 696, 997]]
|
| 103 |
pattern = re.compile(r"<\|det\|>\[\[(\d+),\s*(\d+),\s*(\d+),\s*(\d+)\]\]<\|/det\|>")
|
| 104 |
matches = list(pattern.finditer(text_result)) # Use finditer to get all matches
|
| 105 |
|
| 106 |
if matches:
|
| 107 |
print(f"✅ Found {len(matches)} bounding box(es). Drawing on the original image.")
|
| 108 |
+
|
| 109 |
# Create a copy of the original image to draw on
|
| 110 |
image_with_bboxes = image.copy()
|
| 111 |
draw = ImageDraw.Draw(image_with_bboxes)
|
|
|
|
| 115 |
# Extract coordinates as integers
|
| 116 |
coords_norm = [int(c) for c in match.groups()]
|
| 117 |
x1_norm, y1_norm, x2_norm, y2_norm = coords_norm
|
| 118 |
+
|
| 119 |
# Scale the normalized coordinates (from 1000x1000 space) to the image's actual size
|
| 120 |
x1 = int(x1_norm / 1000 * w)
|
| 121 |
y1 = int(y1_norm / 1000 * h)
|
| 122 |
x2 = int(x2_norm / 1000 * w)
|
| 123 |
y2 = int(y2_norm / 1000 * h)
|
| 124 |
+
|
| 125 |
# Draw the rectangle with a red outline, 3 pixels wide
|
| 126 |
draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
|
| 127 |
+
|
| 128 |
result_image_pil = image_with_bboxes
|
| 129 |
else:
|
| 130 |
# If no coordinates are found in the text, fall back to finding a pre-generated image
|
| 131 |
print("⚠️ No bounding box coordinates found in text result. Falling back to search for a result image file.")
|
| 132 |
result_image_pil = find_result_image(output_path)
|
| 133 |
+
|
| 134 |
return text_result, result_image_pil
|
| 135 |
|
| 136 |
|