Spaces:

prithivMLmods
/

florence2-vision-models

Running on Zero

App Files Files Community

prithivMLmods commited on Oct 29, 2025

Commit

f5323b5

verified ·

1 Parent(s): 1695fa3

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -8

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import spaces
 import os
 import tempfile
 from PIL import Image, ImageDraw
-import re # Import thư viện regular expression
 # --- 1. Load Model and Tokenizer (Done only once at startup) ---
 print("Loading model and tokenizer...")
@@ -14,10 +14,16 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 # Load the model to CPU first; it will be moved to GPU during processing
 model = AutoModel.from_pretrained(
     model_name,
-    #_attn_implementation="flash_attention_2",
     trust_remote_code=True,
     use_safetensors=True,
 )
 model = model.eval()
 print("✅ Model loaded successfully.")
@@ -92,14 +98,14 @@ def process_ocr_task(image, model_size, task_type, ref_text):
         # --- NEW LOGIC: Always try to find and draw all bounding boxes ---
         result_image_pil = None
         # Define the pattern to find all coordinates like [[280, 15, 696, 997]]
         pattern = re.compile(r"<\|det\|>\[\[(\d+),\s*(\d+),\s*(\d+),\s*(\d+)\]\]<\|/det\|>")
         matches = list(pattern.finditer(text_result)) # Use finditer to get all matches
         if matches:
             print(f"✅ Found {len(matches)} bounding box(es). Drawing on the original image.")
             # Create a copy of the original image to draw on
             image_with_bboxes = image.copy()
             draw = ImageDraw.Draw(image_with_bboxes)
@@ -109,22 +115,22 @@ def process_ocr_task(image, model_size, task_type, ref_text):
                 # Extract coordinates as integers
                 coords_norm = [int(c) for c in match.groups()]
                 x1_norm, y1_norm, x2_norm, y2_norm = coords_norm
                 # Scale the normalized coordinates (from 1000x1000 space) to the image's actual size
                 x1 = int(x1_norm / 1000 * w)
                 y1 = int(y1_norm / 1000 * h)
                 x2 = int(x2_norm / 1000 * w)
                 y2 = int(y2_norm / 1000 * h)
                 # Draw the rectangle with a red outline, 3 pixels wide
                 draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
             result_image_pil = image_with_bboxes
         else:
             # If no coordinates are found in the text, fall back to finding a pre-generated image
             print("⚠️ No bounding box coordinates found in text result. Falling back to search for a result image file.")
             result_image_pil = find_result_image(output_path)
         return text_result, result_image_pil

 import os
 import tempfile
 from PIL import Image, ImageDraw
+import re # Import the regular expression library
 # --- 1. Load Model and Tokenizer (Done only once at startup) ---
 print("Loading model and tokenizer...")
 # Load the model to CPU first; it will be moved to GPU during processing
 model = AutoModel.from_pretrained(
     model_name,
     trust_remote_code=True,
     use_safetensors=True,
 )
+# ------------------- FIX -------------------
+# The generate function requires use_cache=True to be explicitly set
+# in the model's configuration to avoid an IndexError during inference.
+model.config.use_cache = True
+# ---------------- END FIX ------------------
 model = model.eval()
 print("✅ Model loaded successfully.")
         # --- NEW LOGIC: Always try to find and draw all bounding boxes ---
         result_image_pil = None
         # Define the pattern to find all coordinates like [[280, 15, 696, 997]]
         pattern = re.compile(r"<\|det\|>\[\[(\d+),\s*(\d+),\s*(\d+),\s*(\d+)\]\]<\|/det\|>")
         matches = list(pattern.finditer(text_result)) # Use finditer to get all matches
         if matches:
             print(f"✅ Found {len(matches)} bounding box(es). Drawing on the original image.")
             # Create a copy of the original image to draw on
             image_with_bboxes = image.copy()
             draw = ImageDraw.Draw(image_with_bboxes)
                 # Extract coordinates as integers
                 coords_norm = [int(c) for c in match.groups()]
                 x1_norm, y1_norm, x2_norm, y2_norm = coords_norm
                 # Scale the normalized coordinates (from 1000x1000 space) to the image's actual size
                 x1 = int(x1_norm / 1000 * w)
                 y1 = int(y1_norm / 1000 * h)
                 x2 = int(x2_norm / 1000 * w)
                 y2 = int(y2_norm / 1000 * h)
                 # Draw the rectangle with a red outline, 3 pixels wide
                 draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
             result_image_pil = image_with_bboxes
         else:
             # If no coordinates are found in the text, fall back to finding a pre-generated image
             print("⚠️ No bounding box coordinates found in text result. Falling back to search for a result image file.")
             result_image_pil = find_result_image(output_path)
         return text_result, result_image_pil