3morrrrr's picture
Update app.py
d9c3dba verified
import os
import cv2
import time
import shutil
import logging
import tempfile
import numpy as np
from math import atan2, degrees
from PIL import Image, ImageOps, ImageDraw
from roboflow import Roboflow
from gradio_client import Client
import gradio as gr
import requests # <-- for downloading PNG from URL
# -------------------------------------------------------------------------
# 🧠 Fix for Gradio schema bug ("TypeError: argument of type 'bool' is not iterable")
# -------------------------------------------------------------------------
import gradio_client.utils as gu
from gradio_client.utils import APIInfoParseError
def safe_get_type(schema):
"""Patch Gradio internal schema handling to prevent bool-type crash."""
if not isinstance(schema, dict):
return type(schema).__name__
if "const" in schema:
return f"Literal[{schema['const']}]"
return schema.get("type", "any")
gu.get_type = safe_get_type
# -------------------------------------------------------------------------
# 🩹 PATCH for APIInfoParseError: safe handling of 'anyOf' schemas
# -------------------------------------------------------------------------
def _safe_json_schema_to_python_type(schema, defs=None):
"""Fix gradio_client parsing for anyOf[string, null] schemas."""
try:
if isinstance(schema, dict) and "anyOf" in schema:
types = [s.get("type") for s in schema["anyOf"] if isinstance(s, dict)]
if set(types) == {"string", "null"}:
return "Optional[str]"
return gu._json_schema_to_python_type_original(schema, defs)
except Exception:
return "UnknownType"
if not hasattr(gu, "_json_schema_to_python_type_original"):
gu._json_schema_to_python_type_original = gu._json_schema_to_python_type
gu._json_schema_to_python_type = _safe_json_schema_to_python_type
print("✅ Patched gradio_client JSON schema parser safely.")
# -------------------------------------------------------------------------
# 🧹 Safely clear Gradio client cache (for all versions)
# -------------------------------------------------------------------------
try:
cache_dir = os.path.expanduser("~/.cache/gradio")
if os.path.exists(cache_dir):
shutil.rmtree(cache_dir)
print("🧹 Cleared Gradio client cache manually.")
except Exception as e:
print(f"⚠️ Warning: Could not clear Gradio cache ({e})")
# -------------------------------------------------------------------------
# 🪵 Logging configuration
# -------------------------------------------------------------------------
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("debug.log", mode="a", encoding="utf-8"),
logging.StreamHandler()
]
)
# -------------------------------------------------------------------------
# 🤖 Roboflow configuration
# -------------------------------------------------------------------------
ROBOFLOW_API_KEY = "u5LX112EBlNmzYoofvPL"
PROJECT_NAME = "model_verification_project"
VERSION_NUMBER = 2
os.environ["ROBOFLOW_API_KEY"] = ROBOFLOW_API_KEY
# -------------------------------------------------------------------------
# ✍️ Handwriting model (Hugging Face Space)
# -------------------------------------------------------------------------
HANDWRITING_MODEL_ENDPOINT = "3morrrrr/Handwriting_Model_Inf"
# Cached handwriting client
_handwriting_client = None
def get_handwriting_client(max_retries=5, retry_delay=3):
"""
Lazily initialize and cache the handwriting Client with retries.
Avoids crashing the app if the Space is waking up / slow.
"""
global _handwriting_client
if _handwriting_client is not None:
return _handwriting_client
last_error = None
for attempt in range(1, max_retries + 1):
try:
logging.info(
f"Initializing handwriting client "
f"(attempt {attempt}/{max_retries}) for {HANDWRITING_MODEL_ENDPOINT}"
)
_handwriting_client = Client(HANDWRITING_MODEL_ENDPOINT)
logging.info("Handwriting client initialized successfully.")
return _handwriting_client
except Exception as e:
last_error = e
logging.warning(
f"Failed to initialize handwriting client "
f"(attempt {attempt}/{max_retries}): {e}"
)
time.sleep(retry_delay)
logging.error("Exceeded max retries while initializing handwriting client.")
raise last_error
# -------------------------------------------------------------------------
# ⚙️ General configuration
# -------------------------------------------------------------------------
MIN_WIDTH_PERCENTAGE = 0.8
TEXT_SCALE_FACTOR = 1.2
DEBUG = True
DEBUG_DIR = os.path.join(tempfile.gettempdir(), "debug_images")
os.makedirs(DEBUG_DIR, exist_ok=True)
logging.info(f"Debug images stored in: {DEBUG_DIR}")
logging.info(
f"Using Roboflow project '{PROJECT_NAME}' (v{VERSION_NUMBER}) "
f"with API key ending in {ROBOFLOW_API_KEY[-4:]}"
)
logging.info(f"Using handwriting model endpoint: {HANDWRITING_MODEL_ENDPOINT}")
# -------------------------------------------------------------------------
# 🧩 Helper functions
# -------------------------------------------------------------------------
def format_text_for_paper(text, paper_width):
"""Auto-wrap text to fit detected paper area."""
pixels_per_char = 13
est_chars = max(10, int((paper_width * 0.8) / pixels_per_char))
char_limit = min(60, est_chars)
words = text.split(" ")
lines, line = [], ""
for word in words:
if len(line + " " + word) <= char_limit:
line += (" " if line else "") + word
else:
lines.append(line)
line = word
if line:
lines.append(line)
return "\n".join(lines)
def save_debug_image(image, filename, text=None):
"""Save debug images for visualization."""
if not DEBUG:
return
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
if text:
draw = ImageDraw.Draw(image)
draw.rectangle([(0, 0), (image.width, 60)], fill=(0, 0, 0, 128))
draw.text((10, 10), text, fill=(255, 255, 255))
path = os.path.join(DEBUG_DIR, filename)
image.save(path)
logging.debug(f"Saved debug image: {path}")
return path
def ensure_local_png(png_output):
"""
Handle Gradio / HF output for the PNG:
- If it's a path string, return it.
- If it's a dict, use .path or .url.
- If it's a URL, download it to a temp file.
"""
if png_output is None:
raise ValueError("Handwriting model returned no PNG output (None).")
png_path = None
# Case 1: plain string path
if isinstance(png_output, str):
png_path = png_output
# Case 2: dict from Gradio output: {path, url, ...}
elif isinstance(png_output, dict):
png_path = png_output.get("path") or png_output.get("url")
else:
raise ValueError(f"Unexpected PNG output type: {type(png_output)}")
if not png_path:
raise ValueError(f"PNG output from handwriting model is missing a path/url: {png_output}")
# If already a local file path
if os.path.exists(png_path):
return png_path
# If it's a URL, download it
if isinstance(png_path, str) and png_path.startswith("http"):
logging.debug(f"Downloading PNG from URL: {png_path}")
temp_png = os.path.join(tempfile.gettempdir(), f"handwriting_{int(time.time())}.png")
try:
r = requests.get(png_path, stream=True, timeout=30)
r.raise_for_status()
with open(temp_png, "wb") as f:
shutil.copyfileobj(r.raw, f)
logging.debug(f"Downloaded PNG to {temp_png}")
return temp_png
except Exception as e:
raise RuntimeError(f"Failed to download PNG from URL: {e}")
# Any other weird case
raise ValueError(f"Invalid PNG path returned: {png_path}")
# -------------------------------------------------------------------------
# 🧠 Load Roboflow models
# -------------------------------------------------------------------------
rf = Roboflow(api_key=ROBOFLOW_API_KEY)
project = rf.workspace().project(PROJECT_NAME)
model = project.version(VERSION_NUMBER).model
# -------------------------------------------------------------------------
# 📐 Detect paper angle
# -------------------------------------------------------------------------
def detect_paper_angle(image, bounding_box):
"""
Detect the angle of a paper document within the given bounding box.
"""
x1, y1, x2, y2 = bounding_box
# Convert PIL image to numpy array if needed
if not isinstance(image, np.ndarray):
image_np = np.array(image)
else:
image_np = image
# Crop the region of interest (ROI)
roi = image_np[y1:y2, x1:x2]
if DEBUG:
debug_roi = Image.fromarray(roi)
save_debug_image(debug_roi, f"paper_roi_{int(time.time())}.png",
text=f"Paper ROI: {roi.shape[1]}x{roi.shape[0]}")
# Convert ROI to grayscale
if len(roi.shape) == 3 and roi.shape[2] >= 3:
gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY)
else:
gray = roi
if DEBUG:
cv2.imwrite(os.path.join(DEBUG_DIR, f"gray_paper_{int(time.time())}.png"), gray)
# Method 1: adaptive thresholding
try:
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2
)
if DEBUG:
cv2.imwrite(os.path.join(DEBUG_DIR, f"binary_paper_{int(time.time())}.png"), binary)
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
contours = sorted(contours, key=cv2.contourArea, reverse=True)
min_area_ratio = 0.05
roi_area = gray.shape[0] * gray.shape[1]
valid_contours = [c for c in contours if cv2.contourArea(c) > roi_area * min_area_ratio]
if valid_contours:
largest_contour = valid_contours[0]
if DEBUG:
contour_debug = np.zeros_like(binary)
cv2.drawContours(contour_debug, [largest_contour], 0, 255, 2)
cv2.imwrite(os.path.join(DEBUG_DIR, f"paper_contour_{int(time.time())}.png"), contour_debug)
rect = cv2.minAreaRect(largest_contour)
box = cv2.boxPoints(rect)
box = np.int0(box)
if DEBUG:
rect_debug = roi.copy() if len(roi.shape) == 3 else cv2.cvtColor(roi, cv2.COLOR_GRAY2RGB)
cv2.drawContours(rect_debug, [box], 0, (0, 0, 255), 2)
cv2.imwrite(os.path.join(DEBUG_DIR, f"paper_rect_{int(time.time())}.png"), rect_debug)
center, (width, height), angle = rect
if width < height:
angle += 90
logging.debug(f"Detected paper angle using adaptive threshold: {angle} degrees")
return angle
except Exception as e:
logging.warning(f"Error in adaptive threshold method: {str(e)}")
# Method 2: Canny + Hough lines
try:
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
median = np.median(blurred)
lower = int(max(0, (1.0 - 0.33) * median))
upper = int(min(255, (1.0 + 0.33) * median))
edges = cv2.Canny(blurred, lower, upper)
if DEBUG:
cv2.imwrite(os.path.join(DEBUG_DIR, f"canny_edges_{int(time.time())}.png"), edges)
kernel = np.ones((3, 3), np.uint8)
dilated_edges = cv2.dilate(edges, kernel, iterations=1)
lines = cv2.HoughLinesP(
dilated_edges, 1, np.pi/180,
threshold=50,
minLineLength=max(roi.shape[0], roi.shape[1]) // 10,
maxLineGap=20
)
if lines is not None and len(lines) > 0:
if DEBUG:
lines_debug = roi.copy() if len(roi.shape) == 3 else cv2.cvtColor(roi, cv2.COLOR_GRAY2RGB)
for line in lines:
x1_l, y1_l, x2_l, y2_l = line[0]
cv2.line(lines_debug, (x1_l, y1_l), (x2_l, y2_l), (0, 255, 255), 2)
cv2.imwrite(os.path.join(DEBUG_DIR, f"hough_lines_{int(time.time())}.png"), lines_debug)
longest_line = max(
lines,
key=lambda line: np.linalg.norm(
(line[0][2] - line[0][0], line[0][3] - line[0][1])
)
)
x1_l, y1_l, x2_l, y2_l = longest_line[0]
dx = x2_l - x1_l
dy = y2_l - y1_l
angle = degrees(atan2(dy, dx))
if angle > 45:
angle -= 90
elif angle < -45:
angle += 90
logging.debug(f"Detected paper angle using Hough lines: {angle} degrees")
return angle
except Exception as e:
logging.warning(f"Error in Hough lines method: {str(e)}")
logging.warning("All paper angle detection methods failed, defaulting to 0 degrees")
return 0
# -------------------------------------------------------------------------
# ✂ Trim whitespace from handwriting image
# -------------------------------------------------------------------------
def extract_text_from_handwriting(image_path):
try:
temp_dir = tempfile.mkdtemp()
temp_image_path = os.path.join(temp_dir, "trimmed_handwriting.png")
debug_image_path = os.path.join(temp_dir, "debug_extraction.png")
img = Image.open(image_path).convert("RGBA")
if DEBUG:
debug_img = img.copy()
draw = ImageDraw.Draw(debug_img)
draw.text(
(10, 10),
f"Original Handwriting: {img.width}x{img.height}",
fill=(255, 0, 0, 255)
)
debug_img.save(os.path.join(DEBUG_DIR, "original_handwriting.png"))
original_width, original_height = img.width, img.height
gray_img = img.convert('L')
thresh = 240
binary_img = gray_img.point(lambda p: p < thresh and 255)
bbox = ImageOps.invert(binary_img).getbbox()
text_dimensions = {}
text_dimensions['original'] = {'width': original_width, 'height': original_height}
if bbox:
padding = 20
left, upper, right, lower = bbox
text_width = right - left
text_height = lower - upper
text_dimensions['text_only'] = {'width': text_width, 'height': text_height}
text_dimensions['text_percentage'] = {
'width': (text_width / original_width) * 100,
'height': (text_height / original_height) * 100
}
bbox = (
max(0, left-padding),
max(0, upper-padding),
min(img.width, right+padding),
min(img.height, lower+padding)
)
trimmed_img = img.crop(bbox)
trimmed_img.save(temp_image_path)
trimmed_width, trimmed_height = trimmed_img.width, trimmed_img.height
text_dimensions['trimmed'] = {'width': trimmed_width, 'height': trimmed_height}
if DEBUG:
debug_img = img.copy()
draw = ImageDraw.Draw(debug_img)
draw.rectangle(bbox, outline=(255, 0, 0, 255), width=2)
draw.text(
(bbox[0], bbox[1] - 15),
(
f"Original: {original_width}x{original_height}, "
f"Text: {text_width}x{text_height} "
f"({text_dimensions['text_percentage']['width']:.1f}%)"
),
fill=(255, 0, 0, 255)
)
debug_img.save(debug_image_path)
debug_img.save(os.path.join(DEBUG_DIR, "text_extraction.png"))
logging.debug(f"Text extraction: {text_dimensions}")
return temp_image_path, temp_dir, text_dimensions
else:
shutil.copy(image_path, temp_image_path)
text_dimensions['error'] = "No text content detected"
logging.warning("No text content detected in handwriting image")
return image_path, None, text_dimensions
except Exception as e:
logging.error(f"Error extracting text from image: {str(e)}")
return image_path, None, {'error': str(e)}
# -------------------------------------------------------------------------
# 🖼 Main processing function
# -------------------------------------------------------------------------
def process_image(image, text, style, bias, color, stroke_width):
temp_dirs = []
try:
timestamp = int(time.time())
input_debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_input.jpg")
image.save(input_debug_path)
# Roboflow detection
rf_local = Roboflow(api_key=ROBOFLOW_API_KEY)
project_local = rf_local.workspace().project(PROJECT_NAME)
model_local = project_local.version(VERSION_NUMBER).model
input_image_path = "/tmp/input_image.jpg"
image.save(input_image_path)
prediction = model_local.predict(input_image_path, confidence=70, overlap=50).json()
num_papers = len(prediction['predictions'])
logging.debug(f"Detected {num_papers} papers")
if num_papers == 0:
logging.error("No papers detected in the image")
return None
# Format text using first paper width
if prediction['predictions']:
obj0 = prediction['predictions'][0]
paper_width = obj0['width']
padding_x = int(paper_width * 0.1)
usable_width = paper_width - 2 * padding_x
formatted_text = format_text_for_paper(text, usable_width)
logging.debug(f"Formatted text for paper width {usable_width}px: \n{formatted_text}")
else:
formatted_text = text
logging.debug("No papers detected, using original text")
# Call handwriting model
logging.debug(f"Calling handwriting model with formatted text: '{formatted_text}'")
handwriting_client = get_handwriting_client()
result = handwriting_client.predict(
formatted_text,
style,
bias,
color,
stroke_width,
api_name="/generate_handwriting_wrapper"
)
svg_content, png_output = result
logging.debug(f"Handwriting model raw PNG output: {png_output}")
png_path = ensure_local_png(png_output)
logging.debug(f"Using PNG path: {png_path}")
# Save original handwriting for reference
orig_hw_debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_original_handwriting.png")
try:
shutil.copy(png_path, orig_hw_debug_path)
logging.debug(f"Saved original handwriting to {orig_hw_debug_path}")
except Exception as e:
logging.error(f"Error saving original handwriting: {str(e)}")
# Extract text and dimensions
trimmed_path, temp_dir, text_dimensions = extract_text_from_handwriting(png_path)
if temp_dir:
temp_dirs.append(temp_dir)
logging.debug(f"Handwriting dimensions: {text_dimensions}")
handwriting_img = Image.open(trimmed_path).convert("RGBA")
logging.debug(f"Loaded trimmed handwriting image: {handwriting_img.width}x{handwriting_img.height}")
trimmed_hw_debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_trimmed_handwriting.png")
handwriting_img.save(trimmed_hw_debug_path)
pil_image = image.convert("RGBA")
debug_image = pil_image.copy()
debug_draw = ImageDraw.Draw(debug_image)
# Process each detected paper
for i, obj in enumerate(prediction['predictions']):
paper_width = obj['width']
paper_height = obj['height']
logging.debug(f"Paper {i+1} dimensions: {paper_width}x{paper_height} at position ({obj['x']}, {obj['y']})")
padding_x = int(paper_width * 0.20)
padding_y = int(paper_height * 0.20)
box_width = paper_width - 2 * padding_x
box_height = paper_height - 2 * padding_y
x1 = int(obj['x'] - paper_width / 2 + padding_x)
y1 = int(obj['y'] - paper_height / 2 + padding_y)
x2 = int(obj['x'] + paper_width / 2 - padding_x)
y2 = int(obj['y'] + paper_height / 2 - padding_y)
paper_box = [
(obj['x'] - paper_width/2, obj['y'] - paper_height/2),
(obj['x'] + paper_width/2, obj['y'] + paper_height/2)
]
debug_draw.rectangle(paper_box, outline=(0, 255, 0, 255), width=3)
debug_draw.text(
(paper_box[0][0], paper_box[0][1] - 15),
f"Paper {i+1}: {paper_width}x{paper_height}",
fill=(0, 255, 0, 255)
)
usable_box = [(x1, y1), (x2, y2)]
debug_draw.rectangle(usable_box, outline=(255, 255, 0, 255), width=2)
debug_draw.text(
(x1, y1 - 15),
f"Usable: {box_width}x{box_height}",
fill=(255, 255, 0, 255)
)
paper_x1 = int(obj['x'] - paper_width / 2)
paper_y1 = int(obj['y'] - paper_height / 2)
paper_x2 = int(obj['x'] + paper_width / 2)
paper_y2 = int(obj['y'] + paper_height / 2)
angle = detect_paper_angle(
np.array(image),
(paper_x1, paper_y1, paper_x2, paper_y2)
)
logging.debug(f"Paper {i+1} angle: {angle} degrees")
debug_draw.line(
[
(obj['x'], obj['y']),
(
obj['x'] + 50 * np.cos(np.radians(angle)),
obj['y'] + 50 * np.sin(np.radians(angle))
)
],
fill=(255, 0, 0, 255),
width=3
)
debug_draw.text(
(obj['x'] + 60, obj['y']),
f"Angle: {angle:.1f}°",
fill=(255, 0, 0, 255)
)
handwriting_aspect = handwriting_img.width / handwriting_img.height
target_width = box_width
target_width = min(int(target_width * TEXT_SCALE_FACTOR), box_width * 2)
target_height = int(target_width / handwriting_aspect)
if target_height > box_height:
target_height = box_height
target_width = int(target_height * handwriting_aspect)
min_width = int(box_width * MIN_WIDTH_PERCENTAGE)
if target_width < min_width:
target_width = min_width
target_height = int(target_width / handwriting_aspect)
if target_height > box_height:
target_height = box_height
target_width = int(target_height * handwriting_aspect)
logging.debug(
f"Paper {i+1} usable area: {box_width}x{box_height}"
)
logging.debug(
"Text resizing: original="
f"{handwriting_img.width}x{handwriting_img.height}, "
f"target={target_width}x{target_height} "
f"(scale factor={TEXT_SCALE_FACTOR})"
)
text_center_x = x1 + box_width // 2
text_center_y = y1 + box_height // 2
text_box = [
(text_center_x - target_width // 2, text_center_y - target_height // 2),
(text_center_x + target_width // 2, text_center_y + target_height // 2)
]
debug_draw.rectangle(text_box, outline=(255, 0, 255, 255), width=2)
debug_draw.text(
(text_box[0][0], text_box[0][1] - 15),
f"Text: {target_width}x{target_height}",
fill=(255, 0, 255, 255)
)
resized_handwriting = handwriting_img.resize(
(target_width, target_height),
Image.LANCZOS
)
resized_hw_debug_path = os.path.join(
DEBUG_DIR,
f"{timestamp}_resized_handwriting_{i+1}.png"
)
resized_handwriting.save(resized_hw_debug_path)
handwriting_layer = Image.new("RGBA", pil_image.size, (0, 0, 0, 0))
paste_x = x1 + (box_width - target_width) // 2
paste_y = y1 + (box_height - target_height) // 2
handwriting_layer.paste(resized_handwriting, (paste_x, paste_y), resized_handwriting)
debug_paste_box = [
(paste_x, paste_y),
(paste_x + target_width, paste_y + target_height)
]
debug_draw.rectangle(debug_paste_box, outline=(0, 0, 255, 255), width=1)
rotation_debug_path = os.path.join(
DEBUG_DIR,
f"{timestamp}_rotation_paper_{i+1}.png"
)
rotation_debug = handwriting_layer.copy()
rotation_debug_draw = ImageDraw.Draw(rotation_debug)
rotation_debug_draw.line(
[(obj['x'] - 50, obj['y']), (obj['x'] + 50, obj['y'])],
fill=(255, 0, 0, 255),
width=1
)
rotation_debug_draw.line(
[(obj['x'], obj['y'] - 50), (obj['x'], obj['y'] + 50)],
fill=(255, 0, 0, 255),
width=1
)
rotation_debug_draw.ellipse(
[(obj['x'] - 5, obj['y'] - 5), (obj['x'] + 5, obj['y'] + 5)],
fill=(255, 0, 0, 255)
)
rotation_debug_draw.text(
(obj['x'] + 10, obj['y'] + 10),
(
f"Rotation center: ({obj['x']}, {obj['y']})\n"
f"Angle: {angle:.1f}°"
),
fill=(255, 0, 0, 255)
)
rotation_debug.save(rotation_debug_path)
rotated_layer = handwriting_layer.rotate(
-angle,
resample=Image.BICUBIC,
center=(obj['x'], obj['y'])
)
pil_image = Image.alpha_composite(pil_image, rotated_layer)
debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_debug_overlay.png")
debug_image.save(debug_path)
logging.debug(f"Saved debug overlay image to {debug_path}")
output_path = "/tmp/output_image.png"
pil_image.convert("RGB").save(output_path)
final_debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_final_output.png")
pil_image.save(final_debug_path)
for dir_path in temp_dirs:
try:
shutil.rmtree(dir_path)
except Exception as e:
logging.warning(f"Failed to clean up temporary directory {dir_path}: {str(e)}")
debug_report = {
'timestamp': timestamp,
'input_image': input_debug_path,
'original_handwriting': orig_hw_debug_path,
'trimmed_handwriting': trimmed_hw_debug_path,
'text_dimensions': text_dimensions,
'detected_papers': len(prediction['predictions']),
'paper_dimensions': [
{
'index': i,
'width': obj['width'],
'height': obj['height'],
'position': (obj['x'], obj['y']),
'detected_angle': detect_paper_angle(
np.array(image),
(
int(obj['x'] - obj['width'] / 2),
int(obj['y'] - obj['height'] / 2),
int(obj['x'] + obj['width'] / 2),
int(obj['y'] + obj['height'] / 2)
)
)
} for i, obj in enumerate(prediction['predictions'])
],
'debug_overlay': debug_path,
'final_output': final_debug_path
}
logging.debug(f"Debug report: {debug_report}")
return {
'output_path': output_path,
'debug_report': debug_report
}
except Exception as e:
for dir_path in temp_dirs:
try:
shutil.rmtree(dir_path)
except:
pass
logging.error(f"Error: {str(e)}")
raise
# -------------------------------------------------------------------------
# 🎛 Gradio interface wrapper
# -------------------------------------------------------------------------
def gradio_process(image, text, style, bias, color, stroke_width, text_size):
global TEXT_SCALE_FACTOR
TEXT_SCALE_FACTOR = text_size
if image is None:
return None, None, "Please upload an image with paper."
if not text:
return None, None, "Please enter text to write on the paper."
try:
result = process_image(image, text, style, bias, color, stroke_width)
if result is None:
return None, None, "No papers detected in the image."
output_path = result['output_path']
debug_report = result['debug_report']
debug_msg = f"Processing complete!\n\n"
debug_msg += f"Debug information in: {DEBUG_DIR}\n"
if 'text_dimensions' in debug_report:
td = debug_report['text_dimensions']
if 'original' in td:
debug_msg += f"Original handwriting: {td['original']['width']}x{td['original']['height']} px\n"
if 'text_only' in td:
debug_msg += f"Text content size: {td['text_only']['width']}x{td['text_only']['height']} px\n"
if 'text_percentage' in td:
debug_msg += f"Text uses {td['text_percentage']['width']:.1f}% of image width\n"
if 'trimmed' in td:
debug_msg += f"Trimmed size: {td['trimmed']['width']}x{td['trimmed']['height']} px\n"
if 'paper_dimensions' in debug_report and len(debug_report['paper_dimensions']) > 0:
paper = debug_report['paper_dimensions'][0]
debug_msg += f"Detected paper: {paper['width']}x{paper['height']} px\n"
debug_msg += f"Paper angle: {paper['detected_angle']:.1f} degrees\n"
debug_msg += f"\nCheck {DEBUG_DIR} for all debug images."
return output_path, output_path, debug_msg
except Exception as e:
logging.exception("Processing error")
return None, None, f"Error: {str(e)}"
# -------------------------------------------------------------------------
# 🚀 Gradio App
# -------------------------------------------------------------------------
interface = gr.Interface(
fn=gradio_process,
inputs=[
gr.Image(type="pil", label="Upload an Image with Paper"),
gr.Textbox(label="Enter Text to Write in Handwriting"),
gr.Slider(minimum=0, maximum=12, step=1, value=9, label="Handwriting Style"),
gr.Slider(minimum=0.5, maximum=1.0, step=0.05, value=0.75, label="Neatness (Higher = Neater)"),
gr.ColorPicker(label="Ink Color", value="#000000"),
gr.Slider(minimum=1, maximum=4, step=0.5, value=2, label="Stroke Width"),
gr.Slider(minimum=1.0, maximum=3.0, step=0.1, value=1.5, label="Text Size Multiplier")
],
outputs=[
gr.Image(label="Processed Image Preview"),
gr.File(label="Download Processed Image"),
gr.Textbox(label="Debug Info", lines=10)
],
title="Handwritten Text on Paper Detection - Debug Version",
description=(
"Upload an image with paper, enter text, and the app will detect the paper "
"and overlay handwritten text on it. Debug info will show what's happening "
"behind the scenes."
)
)
if __name__ == "__main__":
interface.launch(share=True)