Spaces:
Sleeping
Sleeping
| import os | |
| import cv2 | |
| import time | |
| import shutil | |
| import logging | |
| import tempfile | |
| import numpy as np | |
| from math import atan2, degrees | |
| from PIL import Image, ImageOps, ImageDraw | |
| from roboflow import Roboflow | |
| from gradio_client import Client | |
| import gradio as gr | |
| import requests # <-- for downloading PNG from URL | |
| # ------------------------------------------------------------------------- | |
| # 🧠 Fix for Gradio schema bug ("TypeError: argument of type 'bool' is not iterable") | |
| # ------------------------------------------------------------------------- | |
| import gradio_client.utils as gu | |
| from gradio_client.utils import APIInfoParseError | |
| def safe_get_type(schema): | |
| """Patch Gradio internal schema handling to prevent bool-type crash.""" | |
| if not isinstance(schema, dict): | |
| return type(schema).__name__ | |
| if "const" in schema: | |
| return f"Literal[{schema['const']}]" | |
| return schema.get("type", "any") | |
| gu.get_type = safe_get_type | |
| # ------------------------------------------------------------------------- | |
| # 🩹 PATCH for APIInfoParseError: safe handling of 'anyOf' schemas | |
| # ------------------------------------------------------------------------- | |
| def _safe_json_schema_to_python_type(schema, defs=None): | |
| """Fix gradio_client parsing for anyOf[string, null] schemas.""" | |
| try: | |
| if isinstance(schema, dict) and "anyOf" in schema: | |
| types = [s.get("type") for s in schema["anyOf"] if isinstance(s, dict)] | |
| if set(types) == {"string", "null"}: | |
| return "Optional[str]" | |
| return gu._json_schema_to_python_type_original(schema, defs) | |
| except Exception: | |
| return "UnknownType" | |
| if not hasattr(gu, "_json_schema_to_python_type_original"): | |
| gu._json_schema_to_python_type_original = gu._json_schema_to_python_type | |
| gu._json_schema_to_python_type = _safe_json_schema_to_python_type | |
| print("✅ Patched gradio_client JSON schema parser safely.") | |
| # ------------------------------------------------------------------------- | |
| # 🧹 Safely clear Gradio client cache (for all versions) | |
| # ------------------------------------------------------------------------- | |
| try: | |
| cache_dir = os.path.expanduser("~/.cache/gradio") | |
| if os.path.exists(cache_dir): | |
| shutil.rmtree(cache_dir) | |
| print("🧹 Cleared Gradio client cache manually.") | |
| except Exception as e: | |
| print(f"⚠️ Warning: Could not clear Gradio cache ({e})") | |
| # ------------------------------------------------------------------------- | |
| # 🪵 Logging configuration | |
| # ------------------------------------------------------------------------- | |
| logging.basicConfig( | |
| level=logging.DEBUG, | |
| format="%(asctime)s - %(levelname)s - %(message)s", | |
| handlers=[ | |
| logging.FileHandler("debug.log", mode="a", encoding="utf-8"), | |
| logging.StreamHandler() | |
| ] | |
| ) | |
| # ------------------------------------------------------------------------- | |
| # 🤖 Roboflow configuration | |
| # ------------------------------------------------------------------------- | |
| ROBOFLOW_API_KEY = "u5LX112EBlNmzYoofvPL" | |
| PROJECT_NAME = "model_verification_project" | |
| VERSION_NUMBER = 2 | |
| os.environ["ROBOFLOW_API_KEY"] = ROBOFLOW_API_KEY | |
| # ------------------------------------------------------------------------- | |
| # ✍️ Handwriting model (Hugging Face Space) | |
| # ------------------------------------------------------------------------- | |
| HANDWRITING_MODEL_ENDPOINT = "3morrrrr/Handwriting_Model_Inf" | |
| # Cached handwriting client | |
| _handwriting_client = None | |
| def get_handwriting_client(max_retries=5, retry_delay=3): | |
| """ | |
| Lazily initialize and cache the handwriting Client with retries. | |
| Avoids crashing the app if the Space is waking up / slow. | |
| """ | |
| global _handwriting_client | |
| if _handwriting_client is not None: | |
| return _handwriting_client | |
| last_error = None | |
| for attempt in range(1, max_retries + 1): | |
| try: | |
| logging.info( | |
| f"Initializing handwriting client " | |
| f"(attempt {attempt}/{max_retries}) for {HANDWRITING_MODEL_ENDPOINT}" | |
| ) | |
| _handwriting_client = Client(HANDWRITING_MODEL_ENDPOINT) | |
| logging.info("Handwriting client initialized successfully.") | |
| return _handwriting_client | |
| except Exception as e: | |
| last_error = e | |
| logging.warning( | |
| f"Failed to initialize handwriting client " | |
| f"(attempt {attempt}/{max_retries}): {e}" | |
| ) | |
| time.sleep(retry_delay) | |
| logging.error("Exceeded max retries while initializing handwriting client.") | |
| raise last_error | |
| # ------------------------------------------------------------------------- | |
| # ⚙️ General configuration | |
| # ------------------------------------------------------------------------- | |
| MIN_WIDTH_PERCENTAGE = 0.8 | |
| TEXT_SCALE_FACTOR = 1.2 | |
| DEBUG = True | |
| DEBUG_DIR = os.path.join(tempfile.gettempdir(), "debug_images") | |
| os.makedirs(DEBUG_DIR, exist_ok=True) | |
| logging.info(f"Debug images stored in: {DEBUG_DIR}") | |
| logging.info( | |
| f"Using Roboflow project '{PROJECT_NAME}' (v{VERSION_NUMBER}) " | |
| f"with API key ending in {ROBOFLOW_API_KEY[-4:]}" | |
| ) | |
| logging.info(f"Using handwriting model endpoint: {HANDWRITING_MODEL_ENDPOINT}") | |
| # ------------------------------------------------------------------------- | |
| # 🧩 Helper functions | |
| # ------------------------------------------------------------------------- | |
| def format_text_for_paper(text, paper_width): | |
| """Auto-wrap text to fit detected paper area.""" | |
| pixels_per_char = 13 | |
| est_chars = max(10, int((paper_width * 0.8) / pixels_per_char)) | |
| char_limit = min(60, est_chars) | |
| words = text.split(" ") | |
| lines, line = [], "" | |
| for word in words: | |
| if len(line + " " + word) <= char_limit: | |
| line += (" " if line else "") + word | |
| else: | |
| lines.append(line) | |
| line = word | |
| if line: | |
| lines.append(line) | |
| return "\n".join(lines) | |
| def save_debug_image(image, filename, text=None): | |
| """Save debug images for visualization.""" | |
| if not DEBUG: | |
| return | |
| if isinstance(image, np.ndarray): | |
| image = Image.fromarray(image) | |
| if text: | |
| draw = ImageDraw.Draw(image) | |
| draw.rectangle([(0, 0), (image.width, 60)], fill=(0, 0, 0, 128)) | |
| draw.text((10, 10), text, fill=(255, 255, 255)) | |
| path = os.path.join(DEBUG_DIR, filename) | |
| image.save(path) | |
| logging.debug(f"Saved debug image: {path}") | |
| return path | |
| def ensure_local_png(png_output): | |
| """ | |
| Handle Gradio / HF output for the PNG: | |
| - If it's a path string, return it. | |
| - If it's a dict, use .path or .url. | |
| - If it's a URL, download it to a temp file. | |
| """ | |
| if png_output is None: | |
| raise ValueError("Handwriting model returned no PNG output (None).") | |
| png_path = None | |
| # Case 1: plain string path | |
| if isinstance(png_output, str): | |
| png_path = png_output | |
| # Case 2: dict from Gradio output: {path, url, ...} | |
| elif isinstance(png_output, dict): | |
| png_path = png_output.get("path") or png_output.get("url") | |
| else: | |
| raise ValueError(f"Unexpected PNG output type: {type(png_output)}") | |
| if not png_path: | |
| raise ValueError(f"PNG output from handwriting model is missing a path/url: {png_output}") | |
| # If already a local file path | |
| if os.path.exists(png_path): | |
| return png_path | |
| # If it's a URL, download it | |
| if isinstance(png_path, str) and png_path.startswith("http"): | |
| logging.debug(f"Downloading PNG from URL: {png_path}") | |
| temp_png = os.path.join(tempfile.gettempdir(), f"handwriting_{int(time.time())}.png") | |
| try: | |
| r = requests.get(png_path, stream=True, timeout=30) | |
| r.raise_for_status() | |
| with open(temp_png, "wb") as f: | |
| shutil.copyfileobj(r.raw, f) | |
| logging.debug(f"Downloaded PNG to {temp_png}") | |
| return temp_png | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to download PNG from URL: {e}") | |
| # Any other weird case | |
| raise ValueError(f"Invalid PNG path returned: {png_path}") | |
| # ------------------------------------------------------------------------- | |
| # 🧠 Load Roboflow models | |
| # ------------------------------------------------------------------------- | |
| rf = Roboflow(api_key=ROBOFLOW_API_KEY) | |
| project = rf.workspace().project(PROJECT_NAME) | |
| model = project.version(VERSION_NUMBER).model | |
| # ------------------------------------------------------------------------- | |
| # 📐 Detect paper angle | |
| # ------------------------------------------------------------------------- | |
| def detect_paper_angle(image, bounding_box): | |
| """ | |
| Detect the angle of a paper document within the given bounding box. | |
| """ | |
| x1, y1, x2, y2 = bounding_box | |
| # Convert PIL image to numpy array if needed | |
| if not isinstance(image, np.ndarray): | |
| image_np = np.array(image) | |
| else: | |
| image_np = image | |
| # Crop the region of interest (ROI) | |
| roi = image_np[y1:y2, x1:x2] | |
| if DEBUG: | |
| debug_roi = Image.fromarray(roi) | |
| save_debug_image(debug_roi, f"paper_roi_{int(time.time())}.png", | |
| text=f"Paper ROI: {roi.shape[1]}x{roi.shape[0]}") | |
| # Convert ROI to grayscale | |
| if len(roi.shape) == 3 and roi.shape[2] >= 3: | |
| gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY) | |
| else: | |
| gray = roi | |
| if DEBUG: | |
| cv2.imwrite(os.path.join(DEBUG_DIR, f"gray_paper_{int(time.time())}.png"), gray) | |
| # Method 1: adaptive thresholding | |
| try: | |
| binary = cv2.adaptiveThreshold( | |
| gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
| cv2.THRESH_BINARY_INV, 11, 2 | |
| ) | |
| if DEBUG: | |
| cv2.imwrite(os.path.join(DEBUG_DIR, f"binary_paper_{int(time.time())}.png"), binary) | |
| contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| if contours: | |
| contours = sorted(contours, key=cv2.contourArea, reverse=True) | |
| min_area_ratio = 0.05 | |
| roi_area = gray.shape[0] * gray.shape[1] | |
| valid_contours = [c for c in contours if cv2.contourArea(c) > roi_area * min_area_ratio] | |
| if valid_contours: | |
| largest_contour = valid_contours[0] | |
| if DEBUG: | |
| contour_debug = np.zeros_like(binary) | |
| cv2.drawContours(contour_debug, [largest_contour], 0, 255, 2) | |
| cv2.imwrite(os.path.join(DEBUG_DIR, f"paper_contour_{int(time.time())}.png"), contour_debug) | |
| rect = cv2.minAreaRect(largest_contour) | |
| box = cv2.boxPoints(rect) | |
| box = np.int0(box) | |
| if DEBUG: | |
| rect_debug = roi.copy() if len(roi.shape) == 3 else cv2.cvtColor(roi, cv2.COLOR_GRAY2RGB) | |
| cv2.drawContours(rect_debug, [box], 0, (0, 0, 255), 2) | |
| cv2.imwrite(os.path.join(DEBUG_DIR, f"paper_rect_{int(time.time())}.png"), rect_debug) | |
| center, (width, height), angle = rect | |
| if width < height: | |
| angle += 90 | |
| logging.debug(f"Detected paper angle using adaptive threshold: {angle} degrees") | |
| return angle | |
| except Exception as e: | |
| logging.warning(f"Error in adaptive threshold method: {str(e)}") | |
| # Method 2: Canny + Hough lines | |
| try: | |
| blurred = cv2.GaussianBlur(gray, (5, 5), 0) | |
| median = np.median(blurred) | |
| lower = int(max(0, (1.0 - 0.33) * median)) | |
| upper = int(min(255, (1.0 + 0.33) * median)) | |
| edges = cv2.Canny(blurred, lower, upper) | |
| if DEBUG: | |
| cv2.imwrite(os.path.join(DEBUG_DIR, f"canny_edges_{int(time.time())}.png"), edges) | |
| kernel = np.ones((3, 3), np.uint8) | |
| dilated_edges = cv2.dilate(edges, kernel, iterations=1) | |
| lines = cv2.HoughLinesP( | |
| dilated_edges, 1, np.pi/180, | |
| threshold=50, | |
| minLineLength=max(roi.shape[0], roi.shape[1]) // 10, | |
| maxLineGap=20 | |
| ) | |
| if lines is not None and len(lines) > 0: | |
| if DEBUG: | |
| lines_debug = roi.copy() if len(roi.shape) == 3 else cv2.cvtColor(roi, cv2.COLOR_GRAY2RGB) | |
| for line in lines: | |
| x1_l, y1_l, x2_l, y2_l = line[0] | |
| cv2.line(lines_debug, (x1_l, y1_l), (x2_l, y2_l), (0, 255, 255), 2) | |
| cv2.imwrite(os.path.join(DEBUG_DIR, f"hough_lines_{int(time.time())}.png"), lines_debug) | |
| longest_line = max( | |
| lines, | |
| key=lambda line: np.linalg.norm( | |
| (line[0][2] - line[0][0], line[0][3] - line[0][1]) | |
| ) | |
| ) | |
| x1_l, y1_l, x2_l, y2_l = longest_line[0] | |
| dx = x2_l - x1_l | |
| dy = y2_l - y1_l | |
| angle = degrees(atan2(dy, dx)) | |
| if angle > 45: | |
| angle -= 90 | |
| elif angle < -45: | |
| angle += 90 | |
| logging.debug(f"Detected paper angle using Hough lines: {angle} degrees") | |
| return angle | |
| except Exception as e: | |
| logging.warning(f"Error in Hough lines method: {str(e)}") | |
| logging.warning("All paper angle detection methods failed, defaulting to 0 degrees") | |
| return 0 | |
| # ------------------------------------------------------------------------- | |
| # ✂ Trim whitespace from handwriting image | |
| # ------------------------------------------------------------------------- | |
| def extract_text_from_handwriting(image_path): | |
| try: | |
| temp_dir = tempfile.mkdtemp() | |
| temp_image_path = os.path.join(temp_dir, "trimmed_handwriting.png") | |
| debug_image_path = os.path.join(temp_dir, "debug_extraction.png") | |
| img = Image.open(image_path).convert("RGBA") | |
| if DEBUG: | |
| debug_img = img.copy() | |
| draw = ImageDraw.Draw(debug_img) | |
| draw.text( | |
| (10, 10), | |
| f"Original Handwriting: {img.width}x{img.height}", | |
| fill=(255, 0, 0, 255) | |
| ) | |
| debug_img.save(os.path.join(DEBUG_DIR, "original_handwriting.png")) | |
| original_width, original_height = img.width, img.height | |
| gray_img = img.convert('L') | |
| thresh = 240 | |
| binary_img = gray_img.point(lambda p: p < thresh and 255) | |
| bbox = ImageOps.invert(binary_img).getbbox() | |
| text_dimensions = {} | |
| text_dimensions['original'] = {'width': original_width, 'height': original_height} | |
| if bbox: | |
| padding = 20 | |
| left, upper, right, lower = bbox | |
| text_width = right - left | |
| text_height = lower - upper | |
| text_dimensions['text_only'] = {'width': text_width, 'height': text_height} | |
| text_dimensions['text_percentage'] = { | |
| 'width': (text_width / original_width) * 100, | |
| 'height': (text_height / original_height) * 100 | |
| } | |
| bbox = ( | |
| max(0, left-padding), | |
| max(0, upper-padding), | |
| min(img.width, right+padding), | |
| min(img.height, lower+padding) | |
| ) | |
| trimmed_img = img.crop(bbox) | |
| trimmed_img.save(temp_image_path) | |
| trimmed_width, trimmed_height = trimmed_img.width, trimmed_img.height | |
| text_dimensions['trimmed'] = {'width': trimmed_width, 'height': trimmed_height} | |
| if DEBUG: | |
| debug_img = img.copy() | |
| draw = ImageDraw.Draw(debug_img) | |
| draw.rectangle(bbox, outline=(255, 0, 0, 255), width=2) | |
| draw.text( | |
| (bbox[0], bbox[1] - 15), | |
| ( | |
| f"Original: {original_width}x{original_height}, " | |
| f"Text: {text_width}x{text_height} " | |
| f"({text_dimensions['text_percentage']['width']:.1f}%)" | |
| ), | |
| fill=(255, 0, 0, 255) | |
| ) | |
| debug_img.save(debug_image_path) | |
| debug_img.save(os.path.join(DEBUG_DIR, "text_extraction.png")) | |
| logging.debug(f"Text extraction: {text_dimensions}") | |
| return temp_image_path, temp_dir, text_dimensions | |
| else: | |
| shutil.copy(image_path, temp_image_path) | |
| text_dimensions['error'] = "No text content detected" | |
| logging.warning("No text content detected in handwriting image") | |
| return image_path, None, text_dimensions | |
| except Exception as e: | |
| logging.error(f"Error extracting text from image: {str(e)}") | |
| return image_path, None, {'error': str(e)} | |
| # ------------------------------------------------------------------------- | |
| # 🖼 Main processing function | |
| # ------------------------------------------------------------------------- | |
| def process_image(image, text, style, bias, color, stroke_width): | |
| temp_dirs = [] | |
| try: | |
| timestamp = int(time.time()) | |
| input_debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_input.jpg") | |
| image.save(input_debug_path) | |
| # Roboflow detection | |
| rf_local = Roboflow(api_key=ROBOFLOW_API_KEY) | |
| project_local = rf_local.workspace().project(PROJECT_NAME) | |
| model_local = project_local.version(VERSION_NUMBER).model | |
| input_image_path = "/tmp/input_image.jpg" | |
| image.save(input_image_path) | |
| prediction = model_local.predict(input_image_path, confidence=70, overlap=50).json() | |
| num_papers = len(prediction['predictions']) | |
| logging.debug(f"Detected {num_papers} papers") | |
| if num_papers == 0: | |
| logging.error("No papers detected in the image") | |
| return None | |
| # Format text using first paper width | |
| if prediction['predictions']: | |
| obj0 = prediction['predictions'][0] | |
| paper_width = obj0['width'] | |
| padding_x = int(paper_width * 0.1) | |
| usable_width = paper_width - 2 * padding_x | |
| formatted_text = format_text_for_paper(text, usable_width) | |
| logging.debug(f"Formatted text for paper width {usable_width}px: \n{formatted_text}") | |
| else: | |
| formatted_text = text | |
| logging.debug("No papers detected, using original text") | |
| # Call handwriting model | |
| logging.debug(f"Calling handwriting model with formatted text: '{formatted_text}'") | |
| handwriting_client = get_handwriting_client() | |
| result = handwriting_client.predict( | |
| formatted_text, | |
| style, | |
| bias, | |
| color, | |
| stroke_width, | |
| api_name="/generate_handwriting_wrapper" | |
| ) | |
| svg_content, png_output = result | |
| logging.debug(f"Handwriting model raw PNG output: {png_output}") | |
| png_path = ensure_local_png(png_output) | |
| logging.debug(f"Using PNG path: {png_path}") | |
| # Save original handwriting for reference | |
| orig_hw_debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_original_handwriting.png") | |
| try: | |
| shutil.copy(png_path, orig_hw_debug_path) | |
| logging.debug(f"Saved original handwriting to {orig_hw_debug_path}") | |
| except Exception as e: | |
| logging.error(f"Error saving original handwriting: {str(e)}") | |
| # Extract text and dimensions | |
| trimmed_path, temp_dir, text_dimensions = extract_text_from_handwriting(png_path) | |
| if temp_dir: | |
| temp_dirs.append(temp_dir) | |
| logging.debug(f"Handwriting dimensions: {text_dimensions}") | |
| handwriting_img = Image.open(trimmed_path).convert("RGBA") | |
| logging.debug(f"Loaded trimmed handwriting image: {handwriting_img.width}x{handwriting_img.height}") | |
| trimmed_hw_debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_trimmed_handwriting.png") | |
| handwriting_img.save(trimmed_hw_debug_path) | |
| pil_image = image.convert("RGBA") | |
| debug_image = pil_image.copy() | |
| debug_draw = ImageDraw.Draw(debug_image) | |
| # Process each detected paper | |
| for i, obj in enumerate(prediction['predictions']): | |
| paper_width = obj['width'] | |
| paper_height = obj['height'] | |
| logging.debug(f"Paper {i+1} dimensions: {paper_width}x{paper_height} at position ({obj['x']}, {obj['y']})") | |
| padding_x = int(paper_width * 0.20) | |
| padding_y = int(paper_height * 0.20) | |
| box_width = paper_width - 2 * padding_x | |
| box_height = paper_height - 2 * padding_y | |
| x1 = int(obj['x'] - paper_width / 2 + padding_x) | |
| y1 = int(obj['y'] - paper_height / 2 + padding_y) | |
| x2 = int(obj['x'] + paper_width / 2 - padding_x) | |
| y2 = int(obj['y'] + paper_height / 2 - padding_y) | |
| paper_box = [ | |
| (obj['x'] - paper_width/2, obj['y'] - paper_height/2), | |
| (obj['x'] + paper_width/2, obj['y'] + paper_height/2) | |
| ] | |
| debug_draw.rectangle(paper_box, outline=(0, 255, 0, 255), width=3) | |
| debug_draw.text( | |
| (paper_box[0][0], paper_box[0][1] - 15), | |
| f"Paper {i+1}: {paper_width}x{paper_height}", | |
| fill=(0, 255, 0, 255) | |
| ) | |
| usable_box = [(x1, y1), (x2, y2)] | |
| debug_draw.rectangle(usable_box, outline=(255, 255, 0, 255), width=2) | |
| debug_draw.text( | |
| (x1, y1 - 15), | |
| f"Usable: {box_width}x{box_height}", | |
| fill=(255, 255, 0, 255) | |
| ) | |
| paper_x1 = int(obj['x'] - paper_width / 2) | |
| paper_y1 = int(obj['y'] - paper_height / 2) | |
| paper_x2 = int(obj['x'] + paper_width / 2) | |
| paper_y2 = int(obj['y'] + paper_height / 2) | |
| angle = detect_paper_angle( | |
| np.array(image), | |
| (paper_x1, paper_y1, paper_x2, paper_y2) | |
| ) | |
| logging.debug(f"Paper {i+1} angle: {angle} degrees") | |
| debug_draw.line( | |
| [ | |
| (obj['x'], obj['y']), | |
| ( | |
| obj['x'] + 50 * np.cos(np.radians(angle)), | |
| obj['y'] + 50 * np.sin(np.radians(angle)) | |
| ) | |
| ], | |
| fill=(255, 0, 0, 255), | |
| width=3 | |
| ) | |
| debug_draw.text( | |
| (obj['x'] + 60, obj['y']), | |
| f"Angle: {angle:.1f}°", | |
| fill=(255, 0, 0, 255) | |
| ) | |
| handwriting_aspect = handwriting_img.width / handwriting_img.height | |
| target_width = box_width | |
| target_width = min(int(target_width * TEXT_SCALE_FACTOR), box_width * 2) | |
| target_height = int(target_width / handwriting_aspect) | |
| if target_height > box_height: | |
| target_height = box_height | |
| target_width = int(target_height * handwriting_aspect) | |
| min_width = int(box_width * MIN_WIDTH_PERCENTAGE) | |
| if target_width < min_width: | |
| target_width = min_width | |
| target_height = int(target_width / handwriting_aspect) | |
| if target_height > box_height: | |
| target_height = box_height | |
| target_width = int(target_height * handwriting_aspect) | |
| logging.debug( | |
| f"Paper {i+1} usable area: {box_width}x{box_height}" | |
| ) | |
| logging.debug( | |
| "Text resizing: original=" | |
| f"{handwriting_img.width}x{handwriting_img.height}, " | |
| f"target={target_width}x{target_height} " | |
| f"(scale factor={TEXT_SCALE_FACTOR})" | |
| ) | |
| text_center_x = x1 + box_width // 2 | |
| text_center_y = y1 + box_height // 2 | |
| text_box = [ | |
| (text_center_x - target_width // 2, text_center_y - target_height // 2), | |
| (text_center_x + target_width // 2, text_center_y + target_height // 2) | |
| ] | |
| debug_draw.rectangle(text_box, outline=(255, 0, 255, 255), width=2) | |
| debug_draw.text( | |
| (text_box[0][0], text_box[0][1] - 15), | |
| f"Text: {target_width}x{target_height}", | |
| fill=(255, 0, 255, 255) | |
| ) | |
| resized_handwriting = handwriting_img.resize( | |
| (target_width, target_height), | |
| Image.LANCZOS | |
| ) | |
| resized_hw_debug_path = os.path.join( | |
| DEBUG_DIR, | |
| f"{timestamp}_resized_handwriting_{i+1}.png" | |
| ) | |
| resized_handwriting.save(resized_hw_debug_path) | |
| handwriting_layer = Image.new("RGBA", pil_image.size, (0, 0, 0, 0)) | |
| paste_x = x1 + (box_width - target_width) // 2 | |
| paste_y = y1 + (box_height - target_height) // 2 | |
| handwriting_layer.paste(resized_handwriting, (paste_x, paste_y), resized_handwriting) | |
| debug_paste_box = [ | |
| (paste_x, paste_y), | |
| (paste_x + target_width, paste_y + target_height) | |
| ] | |
| debug_draw.rectangle(debug_paste_box, outline=(0, 0, 255, 255), width=1) | |
| rotation_debug_path = os.path.join( | |
| DEBUG_DIR, | |
| f"{timestamp}_rotation_paper_{i+1}.png" | |
| ) | |
| rotation_debug = handwriting_layer.copy() | |
| rotation_debug_draw = ImageDraw.Draw(rotation_debug) | |
| rotation_debug_draw.line( | |
| [(obj['x'] - 50, obj['y']), (obj['x'] + 50, obj['y'])], | |
| fill=(255, 0, 0, 255), | |
| width=1 | |
| ) | |
| rotation_debug_draw.line( | |
| [(obj['x'], obj['y'] - 50), (obj['x'], obj['y'] + 50)], | |
| fill=(255, 0, 0, 255), | |
| width=1 | |
| ) | |
| rotation_debug_draw.ellipse( | |
| [(obj['x'] - 5, obj['y'] - 5), (obj['x'] + 5, obj['y'] + 5)], | |
| fill=(255, 0, 0, 255) | |
| ) | |
| rotation_debug_draw.text( | |
| (obj['x'] + 10, obj['y'] + 10), | |
| ( | |
| f"Rotation center: ({obj['x']}, {obj['y']})\n" | |
| f"Angle: {angle:.1f}°" | |
| ), | |
| fill=(255, 0, 0, 255) | |
| ) | |
| rotation_debug.save(rotation_debug_path) | |
| rotated_layer = handwriting_layer.rotate( | |
| -angle, | |
| resample=Image.BICUBIC, | |
| center=(obj['x'], obj['y']) | |
| ) | |
| pil_image = Image.alpha_composite(pil_image, rotated_layer) | |
| debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_debug_overlay.png") | |
| debug_image.save(debug_path) | |
| logging.debug(f"Saved debug overlay image to {debug_path}") | |
| output_path = "/tmp/output_image.png" | |
| pil_image.convert("RGB").save(output_path) | |
| final_debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_final_output.png") | |
| pil_image.save(final_debug_path) | |
| for dir_path in temp_dirs: | |
| try: | |
| shutil.rmtree(dir_path) | |
| except Exception as e: | |
| logging.warning(f"Failed to clean up temporary directory {dir_path}: {str(e)}") | |
| debug_report = { | |
| 'timestamp': timestamp, | |
| 'input_image': input_debug_path, | |
| 'original_handwriting': orig_hw_debug_path, | |
| 'trimmed_handwriting': trimmed_hw_debug_path, | |
| 'text_dimensions': text_dimensions, | |
| 'detected_papers': len(prediction['predictions']), | |
| 'paper_dimensions': [ | |
| { | |
| 'index': i, | |
| 'width': obj['width'], | |
| 'height': obj['height'], | |
| 'position': (obj['x'], obj['y']), | |
| 'detected_angle': detect_paper_angle( | |
| np.array(image), | |
| ( | |
| int(obj['x'] - obj['width'] / 2), | |
| int(obj['y'] - obj['height'] / 2), | |
| int(obj['x'] + obj['width'] / 2), | |
| int(obj['y'] + obj['height'] / 2) | |
| ) | |
| ) | |
| } for i, obj in enumerate(prediction['predictions']) | |
| ], | |
| 'debug_overlay': debug_path, | |
| 'final_output': final_debug_path | |
| } | |
| logging.debug(f"Debug report: {debug_report}") | |
| return { | |
| 'output_path': output_path, | |
| 'debug_report': debug_report | |
| } | |
| except Exception as e: | |
| for dir_path in temp_dirs: | |
| try: | |
| shutil.rmtree(dir_path) | |
| except: | |
| pass | |
| logging.error(f"Error: {str(e)}") | |
| raise | |
| # ------------------------------------------------------------------------- | |
| # 🎛 Gradio interface wrapper | |
| # ------------------------------------------------------------------------- | |
| def gradio_process(image, text, style, bias, color, stroke_width, text_size): | |
| global TEXT_SCALE_FACTOR | |
| TEXT_SCALE_FACTOR = text_size | |
| if image is None: | |
| return None, None, "Please upload an image with paper." | |
| if not text: | |
| return None, None, "Please enter text to write on the paper." | |
| try: | |
| result = process_image(image, text, style, bias, color, stroke_width) | |
| if result is None: | |
| return None, None, "No papers detected in the image." | |
| output_path = result['output_path'] | |
| debug_report = result['debug_report'] | |
| debug_msg = f"Processing complete!\n\n" | |
| debug_msg += f"Debug information in: {DEBUG_DIR}\n" | |
| if 'text_dimensions' in debug_report: | |
| td = debug_report['text_dimensions'] | |
| if 'original' in td: | |
| debug_msg += f"Original handwriting: {td['original']['width']}x{td['original']['height']} px\n" | |
| if 'text_only' in td: | |
| debug_msg += f"Text content size: {td['text_only']['width']}x{td['text_only']['height']} px\n" | |
| if 'text_percentage' in td: | |
| debug_msg += f"Text uses {td['text_percentage']['width']:.1f}% of image width\n" | |
| if 'trimmed' in td: | |
| debug_msg += f"Trimmed size: {td['trimmed']['width']}x{td['trimmed']['height']} px\n" | |
| if 'paper_dimensions' in debug_report and len(debug_report['paper_dimensions']) > 0: | |
| paper = debug_report['paper_dimensions'][0] | |
| debug_msg += f"Detected paper: {paper['width']}x{paper['height']} px\n" | |
| debug_msg += f"Paper angle: {paper['detected_angle']:.1f} degrees\n" | |
| debug_msg += f"\nCheck {DEBUG_DIR} for all debug images." | |
| return output_path, output_path, debug_msg | |
| except Exception as e: | |
| logging.exception("Processing error") | |
| return None, None, f"Error: {str(e)}" | |
| # ------------------------------------------------------------------------- | |
| # 🚀 Gradio App | |
| # ------------------------------------------------------------------------- | |
| interface = gr.Interface( | |
| fn=gradio_process, | |
| inputs=[ | |
| gr.Image(type="pil", label="Upload an Image with Paper"), | |
| gr.Textbox(label="Enter Text to Write in Handwriting"), | |
| gr.Slider(minimum=0, maximum=12, step=1, value=9, label="Handwriting Style"), | |
| gr.Slider(minimum=0.5, maximum=1.0, step=0.05, value=0.75, label="Neatness (Higher = Neater)"), | |
| gr.ColorPicker(label="Ink Color", value="#000000"), | |
| gr.Slider(minimum=1, maximum=4, step=0.5, value=2, label="Stroke Width"), | |
| gr.Slider(minimum=1.0, maximum=3.0, step=0.1, value=1.5, label="Text Size Multiplier") | |
| ], | |
| outputs=[ | |
| gr.Image(label="Processed Image Preview"), | |
| gr.File(label="Download Processed Image"), | |
| gr.Textbox(label="Debug Info", lines=10) | |
| ], | |
| title="Handwritten Text on Paper Detection - Debug Version", | |
| description=( | |
| "Upload an image with paper, enter text, and the app will detect the paper " | |
| "and overlay handwritten text on it. Debug info will show what's happening " | |
| "behind the scenes." | |
| ) | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch(share=True) | |