3morrrrr commited on
Commit
e9156bf
·
verified ·
1 Parent(s): 85f5561

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -74
app.py CHANGED
@@ -76,7 +76,7 @@ logging.basicConfig(
76
  # -------------------------------------------------------------------------
77
  # 🤖 Roboflow configuration
78
  # -------------------------------------------------------------------------
79
- ROBOFLOW_API_KEY = "JKuvVbqDgv4mBdVM0fUE" # ✅ your key
80
  PROJECT_NAME = "model_verification_project"
81
  VERSION_NUMBER = 2
82
 
@@ -88,6 +88,41 @@ os.environ["ROBOFLOW_API_KEY"] = ROBOFLOW_API_KEY
88
  # -------------------------------------------------------------------------
89
  HANDWRITING_MODEL_ENDPOINT = "3morrrrr/Handwriting_Model_Inf"
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  # -------------------------------------------------------------------------
92
  # ⚙️ General configuration
93
  # -------------------------------------------------------------------------
@@ -137,12 +172,12 @@ def save_debug_image(image, filename, text=None):
137
  return path
138
 
139
  # -------------------------------------------------------------------------
140
- # 🧠 Load Roboflow + HuggingFace models
141
  # -------------------------------------------------------------------------
142
  rf = Roboflow(api_key=ROBOFLOW_API_KEY)
143
  project = rf.workspace().project(PROJECT_NAME)
144
  model = project.version(VERSION_NUMBER).model
145
- client = Client(HANDWRITING_MODEL_ENDPOINT)
146
  # Improved function to detect paper angle
147
  def detect_paper_angle(image, bounding_box):
148
  """
@@ -242,8 +277,6 @@ def detect_paper_angle(image, bounding_box):
242
  except Exception as e:
243
  logging.warning(f"Error in adaptive threshold method: {str(e)}")
244
 
245
-
246
-
247
  # Method 3: Fall back to Canny edge detection with improved parameters
248
  try:
249
  # Apply Gaussian blur to reduce noise
@@ -278,18 +311,22 @@ def detect_paper_angle(image, bounding_box):
278
  if DEBUG:
279
  lines_debug = roi.copy() if len(roi.shape) == 3 else cv2.cvtColor(roi, cv2.COLOR_GRAY2RGB)
280
  for line in lines:
281
- x1, y1, x2, y2 = line[0]
282
- cv2.line(lines_debug, (x1, y1), (x2, y2), (0, 255, 255), 2)
283
  cv2.imwrite(os.path.join(DEBUG_DIR, f"hough_lines_{int(time.time())}.png"), lines_debug)
284
 
285
  # Find the longest line
286
- longest_line = max(lines, key=lambda line:
287
- np.linalg.norm((line[0][2] - line[0][0], line[0][3] - line[0][1])))
288
- x1, y1, x2, y2 = longest_line[0]
 
 
 
 
289
 
290
  # Calculate the angle of the line
291
- dx = x2 - x1
292
- dy = y2 - y1
293
  angle = degrees(atan2(dy, dx))
294
 
295
  # Normalize angle to be between -45 and 45 degrees
@@ -323,16 +360,17 @@ def extract_text_from_handwriting(image_path):
323
  if DEBUG:
324
  debug_img = img.copy()
325
  draw = ImageDraw.Draw(debug_img)
326
- draw.text((10, 10), f"Original Handwriting: {img.width}x{img.height}",
327
- fill=(255, 0, 0, 255))
 
 
 
328
  debug_img.save(os.path.join(DEBUG_DIR, "original_handwriting.png"))
329
 
330
  # Get the original dimensions
331
  original_width, original_height = img.width, img.height
332
 
333
  # Get the bounding box of non-zero areas (text content)
334
- # Convert to grayscale first to better detect handwriting
335
- # This is a more aggressive approach to find the text content
336
  gray_img = img.convert('L')
337
  # Normalize the image to make text stand out
338
  thresh = 240 # Higher threshold to catch light text
@@ -380,9 +418,15 @@ def extract_text_from_handwriting(image_path):
380
  # Draw original bounding box
381
  draw.rectangle(bbox, outline=(255, 0, 0, 255), width=2)
382
  # Add text annotation
383
- draw.text((bbox[0], bbox[1] - 15),
384
- f"Original: {original_width}x{original_height}, Text: {text_width}x{text_height} ({text_dimensions['text_percentage']['width']:.1f}%)",
385
- fill=(255, 0, 0, 255))
 
 
 
 
 
 
386
  debug_img.save(debug_image_path)
387
  # Save for reference
388
  debug_img.save(os.path.join(DEBUG_DIR, "text_extraction.png"))
@@ -393,7 +437,7 @@ def extract_text_from_handwriting(image_path):
393
  # If no content found, just return the original
394
  shutil.copy(image_path, temp_image_path)
395
  text_dimensions['error'] = "No text content detected"
396
- logging.warning(f"No text content detected in handwriting image")
397
  return image_path, None, text_dimensions
398
  except Exception as e:
399
  logging.error(f"Error extracting text from image: {str(e)}")
@@ -411,16 +455,16 @@ def process_image(image, text, style, bias, color, stroke_width):
411
  image.save(input_debug_path)
412
 
413
  # Detect papers using Roboflow first to get paper dimensions for text formatting
414
- rf = Roboflow(api_key=ROBOFLOW_API_KEY)
415
- project = rf.workspace().project(PROJECT_NAME)
416
- model = project.version(VERSION_NUMBER).model
417
 
418
  # Save input image temporarily
419
  input_image_path = "/tmp/input_image.jpg"
420
  image.save(input_image_path)
421
 
422
  # Perform inference to detect papers
423
- prediction = model.predict(input_image_path, confidence=70, overlap=50).json()
424
  num_papers = len(prediction['predictions'])
425
  logging.debug(f"Detected {num_papers} papers")
426
 
@@ -430,8 +474,8 @@ def process_image(image, text, style, bias, color, stroke_width):
430
 
431
  # Format text based on the first detected paper dimensions
432
  if len(prediction['predictions']) > 0:
433
- obj = prediction['predictions'][0]
434
- paper_width = obj['width']
435
 
436
  # Calculate usable width (accounting for padding)
437
  padding_x = int(paper_width * 0.1)
@@ -446,7 +490,7 @@ def process_image(image, text, style, bias, color, stroke_width):
446
 
447
  # 1. Generate handwritten text using the Hugging Face model with formatted text
448
  logging.debug(f"Calling handwriting model with formatted text: '{formatted_text}'")
449
- handwriting_client = Client(HANDWRITING_MODEL_ENDPOINT)
450
  result = handwriting_client.predict(
451
  formatted_text, # Use formatted text instead of original
452
  style, # handwriting style
@@ -499,7 +543,7 @@ def process_image(image, text, style, bias, color, stroke_width):
499
  # Log paper dimensions
500
  logging.debug(f"Paper {i+1} dimensions: {paper_width}x{paper_height} at position ({obj['x']}, {obj['y']})")
501
 
502
- # Add padding (10%)
503
  padding_x = int(paper_width * 0.20)
504
  padding_y = int(paper_height * 0.20)
505
 
@@ -507,7 +551,7 @@ def process_image(image, text, style, bias, color, stroke_width):
507
  box_width = paper_width - 2 * padding_x
508
  box_height = paper_height - 2 * padding_y
509
 
510
- # Calculate paper coordinates
511
  x1 = int(obj['x'] - paper_width / 2 + padding_x)
512
  y1 = int(obj['y'] - paper_height / 2 + padding_y)
513
  x2 = int(obj['x'] + paper_width / 2 - padding_x)
@@ -515,15 +559,22 @@ def process_image(image, text, style, bias, color, stroke_width):
515
 
516
  # Draw paper boundary on debug image
517
  paper_box = [(obj['x'] - paper_width/2, obj['y'] - paper_height/2),
518
- (obj['x'] + paper_width/2, obj['y'] + paper_height/2)]
519
  debug_draw.rectangle(paper_box, outline=(0, 255, 0, 255), width=3)
520
- debug_draw.text((paper_box[0][0], paper_box[0][1] - 15),
521
- f"Paper {i+1}: {paper_width}x{paper_height}", fill=(0, 255, 0, 255))
 
 
 
522
 
523
  # Draw usable area on debug image
524
  usable_box = [(x1, y1), (x2, y2)]
525
  debug_draw.rectangle(usable_box, outline=(255, 255, 0, 255), width=2)
526
- debug_draw.text((x1, y1 - 15), f"Usable: {box_width}x{box_height}", fill=(255, 255, 0, 255))
 
 
 
 
527
 
528
  # Paper coordinates for detecting the actual paper orientation
529
  paper_x1 = int(obj['x'] - paper_width / 2)
@@ -532,35 +583,37 @@ def process_image(image, text, style, bias, color, stroke_width):
532
  paper_y2 = int(obj['y'] + paper_height / 2)
533
 
534
  # Detect the actual paper angle (not just the bounding box)
535
- angle = detect_paper_angle(np.array(image), (paper_x1, paper_y1, paper_x2, paper_y2))
 
 
 
536
  logging.debug(f"Paper {i+1} angle: {angle} degrees")
537
 
538
  # Add a debug visualization of the detected angle
539
  debug_draw.line(
540
- [(obj['x'], obj['y']),
541
- (obj['x'] + 50 * np.cos(np.radians(angle)),
542
- obj['y'] + 50 * np.sin(np.radians(angle)))],
543
- fill=(255, 0, 0, 255), width=3
 
 
 
 
 
544
  )
545
  debug_draw.text(
546
- (obj['x'] + 60, obj['y']),
547
- f"Angle: {angle:.1f}°",
548
  fill=(255, 0, 0, 255)
549
  )
550
 
551
  # Calculate the initial size while maintaining aspect ratio
552
  handwriting_aspect = handwriting_img.width / handwriting_img.height
553
 
554
- # ENHANCED SCALING APPROACH:
555
- # 1. Start with the full paper width for maximum readability
556
- # 2. Then apply the scale factor to make it even larger
557
- # 3. Ensure minimum width percentage
558
- # 4. Only adjust if it doesn't fit the height
559
-
560
  # Start with the full usable width
561
  target_width = box_width
562
 
563
- # Apply scale factor to make text larger (but don't exceed usable width)
564
  target_width = min(int(target_width * TEXT_SCALE_FACTOR), box_width * 2)
565
 
566
  # Calculate height based on aspect ratio
@@ -582,24 +635,41 @@ def process_image(image, text, style, bias, color, stroke_width):
582
  target_width = int(target_height * handwriting_aspect)
583
 
584
  # Log sizing calculations
585
- logging.debug(f"Paper {i+1} usable area: {box_width}x{box_height}")
586
- logging.debug(f"Text resizing: original={handwriting_img.width}x{handwriting_img.height}, " +
587
- f"target={target_width}x{target_height} (scale factor={TEXT_SCALE_FACTOR})")
 
 
 
 
 
 
588
 
589
  # Draw text area on debug image
590
  text_center_x = x1 + box_width // 2
591
  text_center_y = y1 + box_height // 2
592
- text_box = [(text_center_x - target_width // 2, text_center_y - target_height // 2),
593
- (text_center_x + target_width // 2, text_center_y + target_height // 2)]
 
 
594
  debug_draw.rectangle(text_box, outline=(255, 0, 255, 255), width=2)
595
- debug_draw.text((text_box[0][0], text_box[0][1] - 15),
596
- f"Text: {target_width}x{target_height}", fill=(255, 0, 255, 255))
 
 
 
597
 
598
  # Resize the handwriting with the calculated dimensions
599
- resized_handwriting = handwriting_img.resize((target_width, target_height), Image.LANCZOS)
 
 
 
600
 
601
  # Save resized handwriting for reference
602
- resized_hw_debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_resized_handwriting_{i+1}.png")
 
 
 
603
  resized_handwriting.save(resized_hw_debug_path)
604
 
605
  # Create a transparent layer for the handwriting
@@ -613,35 +683,48 @@ def process_image(image, text, style, bias, color, stroke_width):
613
  handwriting_layer.paste(resized_handwriting, (paste_x, paste_y), resized_handwriting)
614
 
615
  # Add to debug image
616
- debug_paste_box = [(paste_x, paste_y), (paste_x + target_width, paste_y + target_height)]
 
 
 
617
  debug_draw.rectangle(debug_paste_box, outline=(0, 0, 255, 255), width=1)
618
 
619
  # Create another debug visualization showing rotation center and angle
620
- rotation_debug_path = os.path.join(DEBUG_DIR, f"{timestamp}_rotation_paper_{i+1}.png")
 
 
 
621
  rotation_debug = handwriting_layer.copy()
622
  rotation_debug_draw = ImageDraw.Draw(rotation_debug)
623
  rotation_debug_draw.line(
624
- [(obj['x'] - 50, obj['y']), (obj['x'] + 50, obj['y'])],
625
- fill=(255, 0, 0, 255), width=1
 
626
  )
627
  rotation_debug_draw.line(
628
- [(obj['x'], obj['y'] - 50), (obj['x'], obj['y'] + 50)],
629
- fill=(255, 0, 0, 255), width=1
 
630
  )
631
  rotation_debug_draw.ellipse(
632
- [(obj['x'] - 5, obj['y'] - 5), (obj['x'] + 5, obj['y'] + 5)],
633
  fill=(255, 0, 0, 255)
634
  )
635
  rotation_debug_draw.text(
636
- (obj['x'] + 10, obj['y'] + 10),
637
- f"Rotation center: ({obj['x']}, {obj['y']})\nAngle: {angle:.1f}°",
 
 
 
638
  fill=(255, 0, 0, 255)
639
  )
640
  rotation_debug.save(rotation_debug_path)
641
 
642
  # Rotate to match paper angle
643
  rotated_layer = handwriting_layer.rotate(
644
- -angle, resample=Image.BICUBIC, center=(obj['x'], obj['y'])
 
 
645
  )
646
 
647
  # Composite onto original image
@@ -681,12 +764,15 @@ def process_image(image, text, style, bias, color, stroke_width):
681
  'width': obj['width'],
682
  'height': obj['height'],
683
  'position': (obj['x'], obj['y']),
684
- 'detected_angle': detect_paper_angle(np.array(image), (
685
- int(obj['x'] - obj['width'] / 2),
686
- int(obj['y'] - obj['height'] / 2),
687
- int(obj['x'] + obj['width'] / 2),
688
- int(obj['y'] + obj['height'] / 2)
689
- ))
 
 
 
690
  } for i, obj in enumerate(prediction['predictions'])
691
  ],
692
  'debug_overlay': debug_path,
@@ -779,9 +865,13 @@ interface = gr.Interface(
779
  gr.Textbox(label="Debug Info", lines=10)
780
  ],
781
  title="Handwritten Text on Paper Detection - Debug Version",
782
- description="Upload an image with paper, enter text, and the app will detect the paper and overlay handwritten text on it. Debug info will show what's happening behind the scenes."
 
 
 
 
783
  )
784
 
785
  # Launch app
786
  if __name__ == "__main__":
787
- interface.launch(share=True)
 
76
  # -------------------------------------------------------------------------
77
  # 🤖 Roboflow configuration
78
  # -------------------------------------------------------------------------
79
+ ROBOFLOW_API_KEY = "u5LX112EBlNmzYoofvPL" # ✅ your key
80
  PROJECT_NAME = "model_verification_project"
81
  VERSION_NUMBER = 2
82
 
 
88
  # -------------------------------------------------------------------------
89
  HANDWRITING_MODEL_ENDPOINT = "3morrrrr/Handwriting_Model_Inf"
90
 
91
+ # Cached client instance (lazy init)
92
+ _handwriting_client = None
93
+
94
+ def get_handwriting_client(max_retries=5, retry_delay=3):
95
+ """
96
+ Lazily initialize and cache the handwriting Client.
97
+
98
+ Retries a few times in case the Hugging Face Space is cold-starting,
99
+ to avoid crashing the whole app on startup.
100
+ """
101
+ global _handwriting_client
102
+ if _handwriting_client is not None:
103
+ return _handwriting_client
104
+
105
+ last_error = None
106
+ for attempt in range(1, max_retries + 1):
107
+ try:
108
+ logging.info(
109
+ f"Initializing handwriting client "
110
+ f"(attempt {attempt}/{max_retries}) for {HANDWRITING_MODEL_ENDPOINT}"
111
+ )
112
+ _handwriting_client = Client(HANDWRITING_MODEL_ENDPOINT)
113
+ logging.info("Handwriting client initialized successfully.")
114
+ return _handwriting_client
115
+ except Exception as e:
116
+ last_error = e
117
+ logging.warning(
118
+ f"Failed to initialize handwriting client "
119
+ f"(attempt {attempt}/{max_retries}): {e}"
120
+ )
121
+ time.sleep(retry_delay)
122
+
123
+ logging.error("Exceeded max retries while initializing handwriting client.")
124
+ raise last_error
125
+
126
  # -------------------------------------------------------------------------
127
  # ⚙️ General configuration
128
  # -------------------------------------------------------------------------
 
172
  return path
173
 
174
  # -------------------------------------------------------------------------
175
+ # 🧠 Load Roboflow models
176
  # -------------------------------------------------------------------------
177
  rf = Roboflow(api_key=ROBOFLOW_API_KEY)
178
  project = rf.workspace().project(PROJECT_NAME)
179
  model = project.version(VERSION_NUMBER).model
180
+
181
  # Improved function to detect paper angle
182
  def detect_paper_angle(image, bounding_box):
183
  """
 
277
  except Exception as e:
278
  logging.warning(f"Error in adaptive threshold method: {str(e)}")
279
 
 
 
280
  # Method 3: Fall back to Canny edge detection with improved parameters
281
  try:
282
  # Apply Gaussian blur to reduce noise
 
311
  if DEBUG:
312
  lines_debug = roi.copy() if len(roi.shape) == 3 else cv2.cvtColor(roi, cv2.COLOR_GRAY2RGB)
313
  for line in lines:
314
+ x1_l, y1_l, x2_l, y2_l = line[0]
315
+ cv2.line(lines_debug, (x1_l, y1_l), (x2_l, y2_l), (0, 255, 255), 2)
316
  cv2.imwrite(os.path.join(DEBUG_DIR, f"hough_lines_{int(time.time())}.png"), lines_debug)
317
 
318
  # Find the longest line
319
+ longest_line = max(
320
+ lines,
321
+ key=lambda line: np.linalg.norm(
322
+ (line[0][2] - line[0][0], line[0][3] - line[0][1])
323
+ )
324
+ )
325
+ x1_l, y1_l, x2_l, y2_l = longest_line[0]
326
 
327
  # Calculate the angle of the line
328
+ dx = x2_l - x1_l
329
+ dy = y2_l - y1_l
330
  angle = degrees(atan2(dy, dx))
331
 
332
  # Normalize angle to be between -45 and 45 degrees
 
360
  if DEBUG:
361
  debug_img = img.copy()
362
  draw = ImageDraw.Draw(debug_img)
363
+ draw.text(
364
+ (10, 10),
365
+ f"Original Handwriting: {img.width}x{img.height}",
366
+ fill=(255, 0, 0, 255)
367
+ )
368
  debug_img.save(os.path.join(DEBUG_DIR, "original_handwriting.png"))
369
 
370
  # Get the original dimensions
371
  original_width, original_height = img.width, img.height
372
 
373
  # Get the bounding box of non-zero areas (text content)
 
 
374
  gray_img = img.convert('L')
375
  # Normalize the image to make text stand out
376
  thresh = 240 # Higher threshold to catch light text
 
418
  # Draw original bounding box
419
  draw.rectangle(bbox, outline=(255, 0, 0, 255), width=2)
420
  # Add text annotation
421
+ draw.text(
422
+ (bbox[0], bbox[1] - 15),
423
+ (
424
+ f"Original: {original_width}x{original_height}, "
425
+ f"Text: {text_width}x{text_height} "
426
+ f"({text_dimensions['text_percentage']['width']:.1f}%)"
427
+ ),
428
+ fill=(255, 0, 0, 255)
429
+ )
430
  debug_img.save(debug_image_path)
431
  # Save for reference
432
  debug_img.save(os.path.join(DEBUG_DIR, "text_extraction.png"))
 
437
  # If no content found, just return the original
438
  shutil.copy(image_path, temp_image_path)
439
  text_dimensions['error'] = "No text content detected"
440
+ logging.warning("No text content detected in handwriting image")
441
  return image_path, None, text_dimensions
442
  except Exception as e:
443
  logging.error(f"Error extracting text from image: {str(e)}")
 
455
  image.save(input_debug_path)
456
 
457
  # Detect papers using Roboflow first to get paper dimensions for text formatting
458
+ rf_local = Roboflow(api_key=ROBOFLOW_API_KEY)
459
+ project_local = rf_local.workspace().project(PROJECT_NAME)
460
+ model_local = project_local.version(VERSION_NUMBER).model
461
 
462
  # Save input image temporarily
463
  input_image_path = "/tmp/input_image.jpg"
464
  image.save(input_image_path)
465
 
466
  # Perform inference to detect papers
467
+ prediction = model_local.predict(input_image_path, confidence=70, overlap=50).json()
468
  num_papers = len(prediction['predictions'])
469
  logging.debug(f"Detected {num_papers} papers")
470
 
 
474
 
475
  # Format text based on the first detected paper dimensions
476
  if len(prediction['predictions']) > 0:
477
+ obj0 = prediction['predictions'][0]
478
+ paper_width = obj0['width']
479
 
480
  # Calculate usable width (accounting for padding)
481
  padding_x = int(paper_width * 0.1)
 
490
 
491
  # 1. Generate handwritten text using the Hugging Face model with formatted text
492
  logging.debug(f"Calling handwriting model with formatted text: '{formatted_text}'")
493
+ handwriting_client = get_handwriting_client()
494
  result = handwriting_client.predict(
495
  formatted_text, # Use formatted text instead of original
496
  style, # handwriting style
 
543
  # Log paper dimensions
544
  logging.debug(f"Paper {i+1} dimensions: {paper_width}x{paper_height} at position ({obj['x']}, {obj['y']})")
545
 
546
+ # Add padding (20%)
547
  padding_x = int(paper_width * 0.20)
548
  padding_y = int(paper_height * 0.20)
549
 
 
551
  box_width = paper_width - 2 * padding_x
552
  box_height = paper_height - 2 * padding_y
553
 
554
+ # Calculate text box coordinates
555
  x1 = int(obj['x'] - paper_width / 2 + padding_x)
556
  y1 = int(obj['y'] - paper_height / 2 + padding_y)
557
  x2 = int(obj['x'] + paper_width / 2 - padding_x)
 
559
 
560
  # Draw paper boundary on debug image
561
  paper_box = [(obj['x'] - paper_width/2, obj['y'] - paper_height/2),
562
+ (obj['x'] + paper_width/2, obj['y'] + paper_height/2)]
563
  debug_draw.rectangle(paper_box, outline=(0, 255, 0, 255), width=3)
564
+ debug_draw.text(
565
+ (paper_box[0][0], paper_box[0][1] - 15),
566
+ f"Paper {i+1}: {paper_width}x{paper_height}",
567
+ fill=(0, 255, 0, 255)
568
+ )
569
 
570
  # Draw usable area on debug image
571
  usable_box = [(x1, y1), (x2, y2)]
572
  debug_draw.rectangle(usable_box, outline=(255, 255, 0, 255), width=2)
573
+ debug_draw.text(
574
+ (x1, y1 - 15),
575
+ f"Usable: {box_width}x{box_height}",
576
+ fill=(255, 255, 0, 255)
577
+ )
578
 
579
  # Paper coordinates for detecting the actual paper orientation
580
  paper_x1 = int(obj['x'] - paper_width / 2)
 
583
  paper_y2 = int(obj['y'] + paper_height / 2)
584
 
585
  # Detect the actual paper angle (not just the bounding box)
586
+ angle = detect_paper_angle(
587
+ np.array(image),
588
+ (paper_x1, paper_y1, paper_x2, paper_y2)
589
+ )
590
  logging.debug(f"Paper {i+1} angle: {angle} degrees")
591
 
592
  # Add a debug visualization of the detected angle
593
  debug_draw.line(
594
+ [
595
+ (obj['x'], obj['y']),
596
+ (
597
+ obj['x'] + 50 * np.cos(np.radians(angle)),
598
+ obj['y'] + 50 * np.sin(np.radians(angle))
599
+ )
600
+ ],
601
+ fill=(255, 0, 0, 255),
602
+ width=3
603
  )
604
  debug_draw.text(
605
+ (obj['x'] + 60, obj['y']),
606
+ f"Angle: {angle:.1f}°",
607
  fill=(255, 0, 0, 255)
608
  )
609
 
610
  # Calculate the initial size while maintaining aspect ratio
611
  handwriting_aspect = handwriting_img.width / handwriting_img.height
612
 
 
 
 
 
 
 
613
  # Start with the full usable width
614
  target_width = box_width
615
 
616
+ # Apply scale factor to make text larger (but don't exceed 2x usable width)
617
  target_width = min(int(target_width * TEXT_SCALE_FACTOR), box_width * 2)
618
 
619
  # Calculate height based on aspect ratio
 
635
  target_width = int(target_height * handwriting_aspect)
636
 
637
  # Log sizing calculations
638
+ logging.debug(
639
+ f"Paper {i+1} usable area: {box_width}x{box_height}"
640
+ )
641
+ logging.debug(
642
+ "Text resizing: original="
643
+ f"{handwriting_img.width}x{handwriting_img.height}, "
644
+ f"target={target_width}x{target_height} "
645
+ f"(scale factor={TEXT_SCALE_FACTOR})"
646
+ )
647
 
648
  # Draw text area on debug image
649
  text_center_x = x1 + box_width // 2
650
  text_center_y = y1 + box_height // 2
651
+ text_box = [
652
+ (text_center_x - target_width // 2, text_center_y - target_height // 2),
653
+ (text_center_x + target_width // 2, text_center_y + target_height // 2)
654
+ ]
655
  debug_draw.rectangle(text_box, outline=(255, 0, 255, 255), width=2)
656
+ debug_draw.text(
657
+ (text_box[0][0], text_box[0][1] - 15),
658
+ f"Text: {target_width}x{target_height}",
659
+ fill=(255, 0, 255, 255)
660
+ )
661
 
662
  # Resize the handwriting with the calculated dimensions
663
+ resized_handwriting = handwriting_img.resize(
664
+ (target_width, target_height),
665
+ Image.LANCZOS
666
+ )
667
 
668
  # Save resized handwriting for reference
669
+ resized_hw_debug_path = os.path.join(
670
+ DEBUG_DIR,
671
+ f"{timestamp}_resized_handwriting_{i+1}.png"
672
+ )
673
  resized_handwriting.save(resized_hw_debug_path)
674
 
675
  # Create a transparent layer for the handwriting
 
683
  handwriting_layer.paste(resized_handwriting, (paste_x, paste_y), resized_handwriting)
684
 
685
  # Add to debug image
686
+ debug_paste_box = [
687
+ (paste_x, paste_y),
688
+ (paste_x + target_width, paste_y + target_height)
689
+ ]
690
  debug_draw.rectangle(debug_paste_box, outline=(0, 0, 255, 255), width=1)
691
 
692
  # Create another debug visualization showing rotation center and angle
693
+ rotation_debug_path = os.path.join(
694
+ DEBUG_DIR,
695
+ f"{timestamp}_rotation_paper_{i+1}.png"
696
+ )
697
  rotation_debug = handwriting_layer.copy()
698
  rotation_debug_draw = ImageDraw.Draw(rotation_debug)
699
  rotation_debug_draw.line(
700
+ [(obj['x'] - 50, obj['y']), (obj['x'] + 50, obj['y'])],
701
+ fill=(255, 0, 0, 255),
702
+ width=1
703
  )
704
  rotation_debug_draw.line(
705
+ [(obj['x'], obj['y'] - 50), (obj['x'], obj['y'] + 50)],
706
+ fill=(255, 0, 0, 255),
707
+ width=1
708
  )
709
  rotation_debug_draw.ellipse(
710
+ [(obj['x'] - 5, obj['y'] - 5), (obj['x'] + 5, obj['y'] + 5)],
711
  fill=(255, 0, 0, 255)
712
  )
713
  rotation_debug_draw.text(
714
+ (obj['x'] + 10, obj['y'] + 10),
715
+ (
716
+ f"Rotation center: ({obj['x']}, {obj['y']})\n"
717
+ f"Angle: {angle:.1f}°"
718
+ ),
719
  fill=(255, 0, 0, 255)
720
  )
721
  rotation_debug.save(rotation_debug_path)
722
 
723
  # Rotate to match paper angle
724
  rotated_layer = handwriting_layer.rotate(
725
+ -angle,
726
+ resample=Image.BICUBIC,
727
+ center=(obj['x'], obj['y'])
728
  )
729
 
730
  # Composite onto original image
 
764
  'width': obj['width'],
765
  'height': obj['height'],
766
  'position': (obj['x'], obj['y']),
767
+ 'detected_angle': detect_paper_angle(
768
+ np.array(image),
769
+ (
770
+ int(obj['x'] - obj['width'] / 2),
771
+ int(obj['y'] - obj['height'] / 2),
772
+ int(obj['x'] + obj['width'] / 2),
773
+ int(obj['y'] + obj['height'] / 2)
774
+ )
775
+ )
776
  } for i, obj in enumerate(prediction['predictions'])
777
  ],
778
  'debug_overlay': debug_path,
 
865
  gr.Textbox(label="Debug Info", lines=10)
866
  ],
867
  title="Handwritten Text on Paper Detection - Debug Version",
868
+ description=(
869
+ "Upload an image with paper, enter text, and the app will detect the paper "
870
+ "and overlay handwritten text on it. Debug info will show what's happening "
871
+ "behind the scenes."
872
+ )
873
  )
874
 
875
  # Launch app
876
  if __name__ == "__main__":
877
+ interface.launch(share=True)