Spaces:

3morrrrr
/

OF_verification_bot

Sleeping

App Files Files Community

3morrrrr commited on Dec 20, 2024

Commit

8870779

verified ·

1 Parent(s): cfd0c4a

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -67

app.py CHANGED Viewed

@@ -2,8 +2,6 @@ import gradio as gr
 import logging
 from roboflow import Roboflow
 from PIL import Image, ImageDraw, ImageFont, ImageFilter
-import cv2
-import numpy as np
 import os
 # Configure logging
@@ -22,7 +20,7 @@ PROJECT_NAME = "model_verification_project"
 VERSION_NUMBER = 2
 FONT_PATH = "./STEVEHANDWRITING-REGULAR.TTF"
-# Function to process image and overlay text with perspective alignment
 def process_image(image, text):
     try:
         # Initialize Roboflow
@@ -39,92 +37,73 @@ def process_image(image, text):
         prediction = model.predict(input_image_path, confidence=70, overlap=50).json()
         logging.debug(f"Inference result: {prediction}")
-        # Convert PIL image to OpenCV format
-        image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGBA2BGRA)
         for obj in prediction['predictions']:
-            # Extract bounding box coordinates
-            x_center, y_center = int(obj['x']), int(obj['y'])
-            width, height = int(obj['width']), int(obj['height'])
-            # Define the four corners of the detected paper
-            corners = [
-                [x_center - width // 2, y_center - height // 2],  # Top-left
-                [x_center + width // 2, y_center - height // 2],  # Top-right
-                [x_center + width // 2, y_center + height // 2],  # Bottom-right
-                [x_center - width // 2, y_center + height // 2],  # Bottom-left
-            ]
-            # Approximate a perspective transformation matrix
-            dst_corners = [
-                [0, 0],  # Top-left in the new perspective
-                [width, 0],  # Top-right
-                [width, height],  # Bottom-right
-                [0, height],  # Bottom-left
-            ]
-            matrix = cv2.getPerspectiveTransform(np.float32(corners), np.float32(dst_corners))
-            # Warp the detected region
-            warped = cv2.warpPerspective(image_cv, matrix, (width, height))
-            # Draw text on the warped region
-            pil_warped = Image.fromarray(cv2.cvtColor(warped, cv2.COLOR_BGRA2RGBA))
-            draw = ImageDraw.Draw(pil_warped)
-            # Calculate dynamic font size and adjust for multi-line text
-            font_size = max(10, int(height * 0.2))  # Adjust size relative to height
             try:
-                font = ImageFont.truetype(FONT_PATH, size=font_size)
-                logging.debug(f"Font loaded successfully: {FONT_PATH} with size {font_size}")
             except Exception as e:
                 logging.warning(f"Error loading font. Using default. {e}")
                 font = ImageFont.load_default()
-            # Split text into multiple lines to fit within the bounding box
             words = text.split()
             lines = []
             current_line = ""
             for word in words:
                 test_line = f"{current_line} {word}".strip()
-                bbox = draw.textbbox((0, 0), test_line, font=font)
-                if bbox[2] - bbox[0] <= width:
                     current_line = test_line
                 else:
                     lines.append(current_line)
                     current_line = word
             if current_line:
                 lines.append(current_line)
             # Draw each line of text within the bounding box
-            line_height = draw.textbbox((0, 0), "Ag", font=font)[3] - draw.textbbox((0, 0), "Ag", font=font)[1]
-            start_y = (height - line_height * len(lines)) // 2
             for i, line in enumerate(lines):
-                text_width = draw.textbbox((0, 0), line, font=font)[2] - draw.textbbox((0, 0), line, font=font)[0]
-                text_x = (width - text_width) // 2
-                text_y = start_y + i * line_height
-                draw.text((text_x, text_y), line, fill=(0, 0, 0, 255), font=font)
-            # Convert back to OpenCV and reapply the perspective transformation
-            warped_with_text = cv2.cvtColor(np.array(pil_warped), cv2.COLOR_RGBA2BGRA)
-            matrix_back = cv2.getPerspectiveTransform(np.float32(dst_corners), np.float32(corners))
-            logging.debug(f"Inverse Transformation Matrix: {matrix_back}")
-            transformed_back = cv2.warpPerspective(
-                warped_with_text, matrix_back,
-                (image_cv.shape[1], image_cv.shape[0]),
-                flags=cv2.WARP_INVERSE_MAP,
-            )
-            # Overlay the transformed text onto the original image
-            mask = transformed_back[:, :, 3] > 0  # Alpha channel as mask
-            logging.debug(f"Alpha mask shape: {mask.shape}, Non-zero values: {np.count_nonzero(mask)}")
-            image_cv[mask] = transformed_back[mask]
-        # Convert back to PIL format and save
-        output_image = Image.fromarray(cv2.cvtColor(image_cv, cv2.COLOR_BGRA2RGBA))
         output_image_path = "/tmp/output_image.png"
-        output_image.save(output_image_path)
         return output_image_path
     except Exception as e:

 import logging
 from roboflow import Roboflow
 from PIL import Image, ImageDraw, ImageFont, ImageFilter
 import os
 # Configure logging
 VERSION_NUMBER = 2
 FONT_PATH = "./STEVEHANDWRITING-REGULAR.TTF"
+# Function to process image and overlay text
 def process_image(image, text):
     try:
         # Initialize Roboflow
         prediction = model.predict(input_image_path, confidence=70, overlap=50).json()
         logging.debug(f"Inference result: {prediction}")
+        # Open the image for processing
+        pil_image = image.convert("RGBA")
+        # Iterate over detected objects
         for obj in prediction['predictions']:
+            x1 = int(obj['x'] - obj['width'] / 2)
+            y1 = int(obj['y'] - obj['height'] / 2)
+            x2 = int(obj['x'] + obj['width'] / 2)
+            y2 = int(obj['y'] + obj['height'] / 2)
+            # Calculate bounding box dimensions
+            box_width = x2 - x1
+            box_height = y2 - y1
+            # Load font
             try:
+                font = ImageFont.truetype(FONT_PATH, size=20)  # Set a base font size
             except Exception as e:
                 logging.warning(f"Error loading font. Using default. {e}")
                 font = ImageFont.load_default()
+            # Ensure pil_image is in RGBA mode
+            if pil_image.mode != "RGBA":
+                pil_image = pil_image.convert("RGBA")
+            # Create a transparent text layer
+            text_layer = Image.new("RGBA", pil_image.size, (255, 255, 255, 0))
+            text_draw = ImageDraw.Draw(text_layer)
+            # Split text into lines to fit within the bounding box
             words = text.split()
             lines = []
             current_line = ""
             for word in words:
                 test_line = f"{current_line} {word}".strip()
+                bbox = text_draw.textbbox((0, 0), test_line, font=font)
+                if bbox[2] - bbox[0] <= box_width:  # Check if text fits
                     current_line = test_line
                 else:
                     lines.append(current_line)
                     current_line = word
             if current_line:
                 lines.append(current_line)
             # Draw each line of text within the bounding box
+            line_height = font.getsize("Hg")[1]
+            total_text_height = len(lines) * line_height
+            if total_text_height > box_height:
+                logging.warning("Text exceeds bounding box height and may be clipped.")
             for i, line in enumerate(lines):
+                text_x = x1 + (box_width - text_draw.textbbox((0, 0), line, font=font)[2]) // 2
+                text_y = y1 + (i * line_height)
+                text_draw.text((text_x, text_y), line, fill=(0, 0, 0, 180), font=font)
+            # Apply slight blur to the text layer
+            blurred_text_layer = text_layer.filter(ImageFilter.GaussianBlur(radius=1.0))
+            # Composite the blurred text onto the original image
+            pil_image = Image.alpha_composite(pil_image, blurred_text_layer)
+        # Save and return output image path
         output_image_path = "/tmp/output_image.png"
+        pil_image.convert("RGB").save(output_image_path)
         return output_image_path
     except Exception as e: