Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -644,7 +644,112 @@ def estimate_text_density(image: Image.Image) -> float:
|
|
| 644 |
return 0.1 # Default to low density
|
| 645 |
|
| 646 |
|
| 647 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 648 |
"""
|
| 649 |
Detect average line spacing in a text region using horizontal projection analysis.
|
| 650 |
|
|
@@ -662,37 +767,32 @@ def detect_line_spacing(image: Image.Image, bbox: List[int]) -> float:
|
|
| 662 |
return None
|
| 663 |
|
| 664 |
# Horizontal projection: sum of dark pixels per row
|
| 665 |
-
|
| 666 |
-
row_sums = np.sum(img_array < 128, axis=1) # Count dark pixels per row
|
| 667 |
|
| 668 |
-
if len(row_sums) < 10:
|
| 669 |
return None
|
| 670 |
|
| 671 |
# Find peaks (text lines) and valleys (spacing between lines)
|
| 672 |
-
# Use adaptive threshold to identify text rows
|
| 673 |
mean_val = np.mean(row_sums)
|
| 674 |
std_val = np.std(row_sums)
|
| 675 |
-
threshold = max(mean_val * 0.
|
| 676 |
|
| 677 |
text_rows = np.where(row_sums > threshold)[0]
|
| 678 |
|
| 679 |
if len(text_rows) < 2:
|
| 680 |
-
return None
|
| 681 |
|
| 682 |
-
# Find gaps between text rows (line spacing)
|
| 683 |
# Group consecutive rows to find line centers
|
| 684 |
line_centers = []
|
| 685 |
current_group = [text_rows[0]]
|
| 686 |
|
| 687 |
for i in range(1, len(text_rows)):
|
| 688 |
-
if text_rows[i] - text_rows[i-1] <= 3:
|
| 689 |
current_group.append(text_rows[i])
|
| 690 |
else:
|
| 691 |
-
# End of current line, start new
|
| 692 |
line_centers.append(int(np.mean(current_group)))
|
| 693 |
current_group = [text_rows[i]]
|
| 694 |
|
| 695 |
-
# Add last group
|
| 696 |
if current_group:
|
| 697 |
line_centers.append(int(np.mean(current_group)))
|
| 698 |
|
|
@@ -703,41 +803,118 @@ def detect_line_spacing(image: Image.Image, bbox: List[int]) -> float:
|
|
| 703 |
spacings = []
|
| 704 |
for i in range(len(line_centers) - 1):
|
| 705 |
spacing = line_centers[i+1] - line_centers[i]
|
| 706 |
-
if spacing > 10:
|
| 707 |
spacings.append(spacing)
|
| 708 |
|
| 709 |
if spacings:
|
| 710 |
-
|
| 711 |
-
avg_spacing = np.median(spacings)
|
| 712 |
-
print(f" → Detected {len(line_centers)} lines with avg spacing {avg_spacing:.1f}px")
|
| 713 |
-
return float(avg_spacing)
|
| 714 |
|
| 715 |
return None
|
| 716 |
except Exception as e:
|
| 717 |
-
print(f" ⚠️ Could not detect line spacing: {e}")
|
| 718 |
return None
|
| 719 |
|
| 720 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 721 |
def split_text_regions_into_lines(
|
| 722 |
image: Image.Image,
|
| 723 |
layout_data: List[Dict[str, Any]],
|
| 724 |
-
min_line_height: int =
|
| 725 |
-
max_line_height: int =
|
| 726 |
) -> List[Dict[str, Any]]:
|
| 727 |
"""
|
| 728 |
-
|
| 729 |
|
| 730 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 731 |
|
| 732 |
Args:
|
| 733 |
image: Original image
|
| 734 |
layout_data: Layout detection results
|
| 735 |
-
min_line_height:
|
| 736 |
-
max_line_height:
|
| 737 |
|
| 738 |
Returns:
|
| 739 |
Updated layout data with lines split
|
| 740 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 741 |
result = []
|
| 742 |
split_count = 0
|
| 743 |
|
|
@@ -755,110 +932,90 @@ def split_text_regions_into_lines(
|
|
| 755 |
height = y2 - y1
|
| 756 |
width = x2 - x1
|
| 757 |
|
| 758 |
-
|
|
|
|
|
|
|
| 759 |
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
print(f"
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
# Try to detect actual line spacing from the image
|
| 770 |
-
detected_spacing = detect_line_spacing(image, bbox)
|
| 771 |
-
|
| 772 |
-
if detected_spacing and detected_spacing > min_line_height:
|
| 773 |
-
# Use detected spacing for more accurate splitting
|
| 774 |
-
estimated_lines = max(2, round(height / detected_spacing))
|
| 775 |
-
line_height = detected_spacing
|
| 776 |
-
print(f" → Detected line spacing: {detected_spacing:.1f}px, splitting into ~{estimated_lines} lines")
|
| 777 |
-
else:
|
| 778 |
-
# Fallback to estimated line height
|
| 779 |
-
# Arabic handwritten text: ~40-60px per line
|
| 780 |
-
# Arabic typed text: ~30-50px per line
|
| 781 |
-
avg_line_height = 45 # Middle ground
|
| 782 |
-
estimated_lines = max(2, round(height / avg_line_height))
|
| 783 |
-
line_height = height / estimated_lines
|
| 784 |
-
print(f" → Using estimated line height: {avg_line_height}px, splitting into {estimated_lines} lines")
|
| 785 |
-
|
| 786 |
-
# Don't split into too many lines (might be a paragraph)
|
| 787 |
-
estimated_lines = min(estimated_lines, 10)
|
| 788 |
-
|
| 789 |
-
# Calculate padding to avoid cutting through text
|
| 790 |
-
# Use 10% of line height as padding, but at least 3px
|
| 791 |
-
padding = max(3, int(line_height * 0.1))
|
| 792 |
-
|
| 793 |
-
# Split text content by newlines if available
|
| 794 |
-
text_lines = text_content.split('\n') if text_content else []
|
| 795 |
-
|
| 796 |
-
# If we have the same number of text lines as estimated, use them
|
| 797 |
-
if len(text_lines) == estimated_lines and len(text_lines) > 1:
|
| 798 |
-
for i, line_text in enumerate(text_lines):
|
| 799 |
-
if not line_text.strip():
|
| 800 |
-
continue
|
| 801 |
new_item = item.copy()
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
new_y1 = max(y1, int(new_y1))
|
| 819 |
-
new_y2 = min(y2, int(new_y2))
|
| 820 |
-
|
| 821 |
-
if new_y2 > new_y1: # Valid bbox
|
| 822 |
-
new_item['bbox'] = [x1, new_y1, x2, new_y2]
|
| 823 |
-
new_item['text'] = line_text.strip()
|
| 824 |
-
new_item['split_from_parent'] = True
|
| 825 |
-
result.append(new_item)
|
| 826 |
split_count += 1
|
| 827 |
-
|
| 828 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 829 |
for i in range(estimated_lines):
|
| 830 |
new_item = item.copy()
|
| 831 |
|
| 832 |
-
# Calculate bbox with padding to avoid cutting text
|
| 833 |
if i == 0:
|
| 834 |
-
# First line: pad bottom only
|
| 835 |
new_y1 = y1
|
| 836 |
new_y2 = y1 + line_height + padding
|
| 837 |
elif i == estimated_lines - 1:
|
| 838 |
-
# Last line: pad top only
|
| 839 |
new_y1 = y1 + (i * line_height) - padding
|
| 840 |
new_y2 = y2
|
| 841 |
else:
|
| 842 |
-
# Middle lines: pad both top and bottom
|
| 843 |
new_y1 = y1 + (i * line_height) - padding
|
| 844 |
new_y2 = y1 + ((i + 1) * line_height) + padding
|
| 845 |
|
| 846 |
-
# Ensure bbox is valid and within image bounds
|
| 847 |
new_y1 = max(y1, int(new_y1))
|
| 848 |
new_y2 = min(y2, int(new_y2))
|
| 849 |
|
| 850 |
-
if new_y2 > new_y1:
|
| 851 |
new_item['bbox'] = [x1, new_y1, x2, new_y2]
|
| 852 |
-
# Clear text - will be re-OCR'd per-line for accuracy
|
| 853 |
new_item['text'] = ""
|
| 854 |
new_item['split_from_parent'] = True
|
| 855 |
-
new_item['needs_reocr'] = True
|
| 856 |
new_item['line_number'] = i + 1
|
| 857 |
result.append(new_item)
|
|
|
|
| 858 |
split_count += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 859 |
|
| 860 |
if split_count > 0:
|
| 861 |
-
print(f"📏 Split {split_count}
|
| 862 |
|
| 863 |
return result
|
| 864 |
|
|
|
|
| 644 |
return 0.1 # Default to low density
|
| 645 |
|
| 646 |
|
| 647 |
+
def analyze_image_line_characteristics(image: Image.Image) -> Dict[str, float]:
|
| 648 |
+
"""
|
| 649 |
+
Analyze image to determine optimal line detection parameters.
|
| 650 |
+
Works adaptively for any image type (sparse, dense, tables, forms).
|
| 651 |
+
|
| 652 |
+
Returns dict with: avg_line_height, min_line_height, max_line_height, line_spacing
|
| 653 |
+
"""
|
| 654 |
+
try:
|
| 655 |
+
width, height = image.size
|
| 656 |
+
gray = image.convert('L')
|
| 657 |
+
img_array = np.array(gray)
|
| 658 |
+
|
| 659 |
+
# Horizontal projection: sum of dark pixels per row
|
| 660 |
+
row_sums = np.sum(img_array < 128, axis=1)
|
| 661 |
+
|
| 662 |
+
if len(row_sums) < 10:
|
| 663 |
+
# Fallback for very small images
|
| 664 |
+
return {
|
| 665 |
+
'avg_line_height': height / 10, # Assume ~10 lines
|
| 666 |
+
'min_line_height': max(15, height / 20),
|
| 667 |
+
'max_line_height': height / 3, # Split if > 1/3 of image height
|
| 668 |
+
'line_spacing': height / 15
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
# Find text rows (peaks in projection)
|
| 672 |
+
mean_val = np.mean(row_sums)
|
| 673 |
+
std_val = np.std(row_sums)
|
| 674 |
+
threshold = max(mean_val * 0.2, mean_val - std_val * 0.3)
|
| 675 |
+
text_rows = np.where(row_sums > threshold)[0]
|
| 676 |
+
|
| 677 |
+
if len(text_rows) < 2:
|
| 678 |
+
# No clear text lines detected, use conservative estimates
|
| 679 |
+
estimated_lines = max(5, height // 50)
|
| 680 |
+
return {
|
| 681 |
+
'avg_line_height': height / estimated_lines,
|
| 682 |
+
'min_line_height': max(15, height / (estimated_lines * 2)),
|
| 683 |
+
'max_line_height': height / 2, # Split if > half image
|
| 684 |
+
'line_spacing': height / estimated_lines
|
| 685 |
+
}
|
| 686 |
+
|
| 687 |
+
# Group consecutive text rows into lines
|
| 688 |
+
line_centers = []
|
| 689 |
+
current_group = [text_rows[0]]
|
| 690 |
+
|
| 691 |
+
for i in range(1, len(text_rows)):
|
| 692 |
+
if text_rows[i] - text_rows[i-1] <= 5: # Consecutive rows
|
| 693 |
+
current_group.append(text_rows[i])
|
| 694 |
+
else:
|
| 695 |
+
line_centers.append(int(np.mean(current_group)))
|
| 696 |
+
current_group = [text_rows[i]]
|
| 697 |
+
|
| 698 |
+
if current_group:
|
| 699 |
+
line_centers.append(int(np.mean(current_group)))
|
| 700 |
+
|
| 701 |
+
if len(line_centers) < 2:
|
| 702 |
+
# Can't determine spacing
|
| 703 |
+
estimated_lines = max(3, height // 60)
|
| 704 |
+
return {
|
| 705 |
+
'avg_line_height': height / estimated_lines,
|
| 706 |
+
'min_line_height': max(20, height / (estimated_lines * 2)),
|
| 707 |
+
'max_line_height': height / 2,
|
| 708 |
+
'line_spacing': height / estimated_lines
|
| 709 |
+
}
|
| 710 |
+
|
| 711 |
+
# Calculate spacing between lines
|
| 712 |
+
spacings = []
|
| 713 |
+
for i in range(len(line_centers) - 1):
|
| 714 |
+
spacing = line_centers[i+1] - line_centers[i]
|
| 715 |
+
if spacing > 8: # Minimum reasonable spacing
|
| 716 |
+
spacings.append(spacing)
|
| 717 |
+
|
| 718 |
+
if spacings:
|
| 719 |
+
avg_spacing = np.median(spacings)
|
| 720 |
+
min_spacing = np.percentile(spacings, 25)
|
| 721 |
+
max_spacing = np.percentile(spacings, 75)
|
| 722 |
+
|
| 723 |
+
return {
|
| 724 |
+
'avg_line_height': float(avg_spacing),
|
| 725 |
+
'min_line_height': float(max(15, min_spacing * 0.6)), # 60% of min spacing
|
| 726 |
+
'max_line_height': float(max_spacing * 1.5), # 1.5x max spacing = likely multi-line
|
| 727 |
+
'line_spacing': float(avg_spacing),
|
| 728 |
+
'num_lines_detected': len(line_centers)
|
| 729 |
+
}
|
| 730 |
+
|
| 731 |
+
# Fallback
|
| 732 |
+
estimated_lines = max(3, height // 50)
|
| 733 |
+
return {
|
| 734 |
+
'avg_line_height': height / estimated_lines,
|
| 735 |
+
'min_line_height': max(20, height / (estimated_lines * 2)),
|
| 736 |
+
'max_line_height': height / 2,
|
| 737 |
+
'line_spacing': height / estimated_lines
|
| 738 |
+
}
|
| 739 |
+
|
| 740 |
+
except Exception as e:
|
| 741 |
+
print(f" ⚠️ Error analyzing image: {e}")
|
| 742 |
+
# Ultra-conservative fallback
|
| 743 |
+
width, height = image.size
|
| 744 |
+
return {
|
| 745 |
+
'avg_line_height': 50,
|
| 746 |
+
'min_line_height': 25,
|
| 747 |
+
'max_line_height': 100,
|
| 748 |
+
'line_spacing': 50
|
| 749 |
+
}
|
| 750 |
+
|
| 751 |
+
|
| 752 |
+
def detect_line_spacing(image: Image.Image, bbox: List[int]) -> Optional[float]:
|
| 753 |
"""
|
| 754 |
Detect average line spacing in a text region using horizontal projection analysis.
|
| 755 |
|
|
|
|
| 767 |
return None
|
| 768 |
|
| 769 |
# Horizontal projection: sum of dark pixels per row
|
| 770 |
+
row_sums = np.sum(img_array < 128, axis=1)
|
|
|
|
| 771 |
|
| 772 |
+
if len(row_sums) < 10:
|
| 773 |
return None
|
| 774 |
|
| 775 |
# Find peaks (text lines) and valleys (spacing between lines)
|
|
|
|
| 776 |
mean_val = np.mean(row_sums)
|
| 777 |
std_val = np.std(row_sums)
|
| 778 |
+
threshold = max(mean_val * 0.25, mean_val - std_val * 0.4)
|
| 779 |
|
| 780 |
text_rows = np.where(row_sums > threshold)[0]
|
| 781 |
|
| 782 |
if len(text_rows) < 2:
|
| 783 |
+
return None
|
| 784 |
|
|
|
|
| 785 |
# Group consecutive rows to find line centers
|
| 786 |
line_centers = []
|
| 787 |
current_group = [text_rows[0]]
|
| 788 |
|
| 789 |
for i in range(1, len(text_rows)):
|
| 790 |
+
if text_rows[i] - text_rows[i-1] <= 3:
|
| 791 |
current_group.append(text_rows[i])
|
| 792 |
else:
|
|
|
|
| 793 |
line_centers.append(int(np.mean(current_group)))
|
| 794 |
current_group = [text_rows[i]]
|
| 795 |
|
|
|
|
| 796 |
if current_group:
|
| 797 |
line_centers.append(int(np.mean(current_group)))
|
| 798 |
|
|
|
|
| 803 |
spacings = []
|
| 804 |
for i in range(len(line_centers) - 1):
|
| 805 |
spacing = line_centers[i+1] - line_centers[i]
|
| 806 |
+
if spacing > 10:
|
| 807 |
spacings.append(spacing)
|
| 808 |
|
| 809 |
if spacings:
|
| 810 |
+
return float(np.median(spacings))
|
|
|
|
|
|
|
|
|
|
| 811 |
|
| 812 |
return None
|
| 813 |
except Exception as e:
|
|
|
|
| 814 |
return None
|
| 815 |
|
| 816 |
|
| 817 |
+
def detect_actual_line_breaks_in_region(image: Image.Image, bbox: List[int]) -> List[int]:
|
| 818 |
+
"""
|
| 819 |
+
Detect actual line break positions within a text region using horizontal projection.
|
| 820 |
+
Returns list of y-coordinates where lines break.
|
| 821 |
+
"""
|
| 822 |
+
try:
|
| 823 |
+
x1, y1, x2, y2 = bbox
|
| 824 |
+
crop = image.crop((x1, y1, x2, y2))
|
| 825 |
+
gray = crop.convert('L')
|
| 826 |
+
img_array = np.array(gray)
|
| 827 |
+
|
| 828 |
+
if img_array.size == 0:
|
| 829 |
+
return []
|
| 830 |
+
|
| 831 |
+
# Horizontal projection
|
| 832 |
+
row_sums = np.sum(img_array < 128, axis=1)
|
| 833 |
+
|
| 834 |
+
if len(row_sums) < 5:
|
| 835 |
+
return []
|
| 836 |
+
|
| 837 |
+
# Find valleys (spaces between lines) and peaks (text lines)
|
| 838 |
+
mean_val = np.mean(row_sums)
|
| 839 |
+
std_val = np.std(row_sums)
|
| 840 |
+
text_threshold = max(mean_val * 0.25, mean_val - std_val * 0.4)
|
| 841 |
+
space_threshold = mean_val * 0.15 # Much lower for spaces
|
| 842 |
+
|
| 843 |
+
# Find text rows and space rows
|
| 844 |
+
text_rows = np.where(row_sums > text_threshold)[0]
|
| 845 |
+
space_rows = np.where(row_sums < space_threshold)[0]
|
| 846 |
+
|
| 847 |
+
if len(text_rows) < 2:
|
| 848 |
+
return []
|
| 849 |
+
|
| 850 |
+
# Group text rows into lines
|
| 851 |
+
line_groups = []
|
| 852 |
+
current_group = [text_rows[0]]
|
| 853 |
+
|
| 854 |
+
for i in range(1, len(text_rows)):
|
| 855 |
+
if text_rows[i] - text_rows[i-1] <= 3:
|
| 856 |
+
current_group.append(text_rows[i])
|
| 857 |
+
else:
|
| 858 |
+
if len(current_group) > 0:
|
| 859 |
+
line_groups.append(current_group)
|
| 860 |
+
current_group = [text_rows[i]]
|
| 861 |
+
|
| 862 |
+
if len(current_group) > 0:
|
| 863 |
+
line_groups.append(current_group)
|
| 864 |
+
|
| 865 |
+
if len(line_groups) < 2:
|
| 866 |
+
return [] # Single line or can't detect
|
| 867 |
+
|
| 868 |
+
# Find break points (midpoints between line groups)
|
| 869 |
+
break_points = []
|
| 870 |
+
for i in range(len(line_groups) - 1):
|
| 871 |
+
last_row_of_line1 = max(line_groups[i])
|
| 872 |
+
first_row_of_line2 = min(line_groups[i+1])
|
| 873 |
+
break_point = (last_row_of_line1 + first_row_of_line2) // 2
|
| 874 |
+
break_points.append(y1 + break_point) # Convert to image coordinates
|
| 875 |
+
|
| 876 |
+
return break_points
|
| 877 |
+
|
| 878 |
+
except Exception as e:
|
| 879 |
+
print(f" ⚠️ Error detecting line breaks: {e}")
|
| 880 |
+
return []
|
| 881 |
+
|
| 882 |
+
|
| 883 |
def split_text_regions_into_lines(
|
| 884 |
image: Image.Image,
|
| 885 |
layout_data: List[Dict[str, Any]],
|
| 886 |
+
min_line_height: Optional[int] = None,
|
| 887 |
+
max_line_height: Optional[int] = None
|
| 888 |
) -> List[Dict[str, Any]]:
|
| 889 |
"""
|
| 890 |
+
Intelligently split text regions into individual lines.
|
| 891 |
|
| 892 |
+
ADAPTIVE APPROACH:
|
| 893 |
+
- Analyzes image to determine optimal parameters
|
| 894 |
+
- Detects actual line breaks using image analysis
|
| 895 |
+
- Works for any image type (sparse, dense, tables, forms)
|
| 896 |
+
- No hardcoded thresholds
|
| 897 |
|
| 898 |
Args:
|
| 899 |
image: Original image
|
| 900 |
layout_data: Layout detection results
|
| 901 |
+
min_line_height: Optional override (auto-detected if None)
|
| 902 |
+
max_line_height: Optional override (auto-detected if None)
|
| 903 |
|
| 904 |
Returns:
|
| 905 |
Updated layout data with lines split
|
| 906 |
"""
|
| 907 |
+
# Analyze image to get adaptive parameters
|
| 908 |
+
img_chars = analyze_image_line_characteristics(image)
|
| 909 |
+
adaptive_min = min_line_height if min_line_height else int(img_chars['min_line_height'])
|
| 910 |
+
adaptive_max = max_line_height if max_line_height else int(img_chars['max_line_height'])
|
| 911 |
+
avg_line_height = img_chars['avg_line_height']
|
| 912 |
+
|
| 913 |
+
print(f"\n📊 Image analysis: avg_line_height={avg_line_height:.1f}px, "
|
| 914 |
+
f"min={adaptive_min}px, max={adaptive_max}px")
|
| 915 |
+
if 'num_lines_detected' in img_chars:
|
| 916 |
+
print(f" Detected ~{img_chars['num_lines_detected']} lines in image")
|
| 917 |
+
|
| 918 |
result = []
|
| 919 |
split_count = 0
|
| 920 |
|
|
|
|
| 932 |
height = y2 - y1
|
| 933 |
width = x2 - x1
|
| 934 |
|
| 935 |
+
# ALWAYS check if region contains multiple lines, regardless of height
|
| 936 |
+
# Use image analysis to detect actual line breaks
|
| 937 |
+
line_breaks = detect_actual_line_breaks_in_region(image, bbox)
|
| 938 |
|
| 939 |
+
if len(line_breaks) > 0:
|
| 940 |
+
# We detected actual line breaks - split at those positions
|
| 941 |
+
print(f" Region: {category} (h={height}px) - Detected {len(line_breaks)+1} lines via image analysis")
|
| 942 |
+
|
| 943 |
+
# Create lines based on detected breaks
|
| 944 |
+
current_y = y1
|
| 945 |
+
for i, break_y in enumerate(line_breaks):
|
| 946 |
+
# Create line from current_y to break_y
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 947 |
new_item = item.copy()
|
| 948 |
+
new_item['bbox'] = [x1, int(current_y), x2, int(break_y)]
|
| 949 |
+
new_item['text'] = "" # Will be re-OCR'd
|
| 950 |
+
new_item['split_from_parent'] = True
|
| 951 |
+
new_item['needs_reocr'] = True
|
| 952 |
+
new_item['line_number'] = i + 1
|
| 953 |
+
result.append(new_item)
|
| 954 |
+
current_y = break_y
|
| 955 |
+
|
| 956 |
+
# Add last line
|
| 957 |
+
new_item = item.copy()
|
| 958 |
+
new_item['bbox'] = [x1, int(current_y), x2, y2]
|
| 959 |
+
new_item['text'] = ""
|
| 960 |
+
new_item['split_from_parent'] = True
|
| 961 |
+
new_item['needs_reocr'] = True
|
| 962 |
+
new_item['line_number'] = len(line_breaks) + 1
|
| 963 |
+
result.append(new_item)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 964 |
split_count += 1
|
| 965 |
+
|
| 966 |
+
elif height > adaptive_max:
|
| 967 |
+
# No line breaks detected but region is tall - use spacing-based split
|
| 968 |
+
print(f" Region: {category} (h={height}px) - Tall region, using spacing-based split")
|
| 969 |
+
|
| 970 |
+
# Try to detect spacing in this specific region
|
| 971 |
+
detected_spacing = detect_line_spacing(image, bbox)
|
| 972 |
+
|
| 973 |
+
if detected_spacing and detected_spacing > adaptive_min:
|
| 974 |
+
line_height = detected_spacing
|
| 975 |
+
estimated_lines = max(2, round(height / line_height))
|
| 976 |
+
else:
|
| 977 |
+
line_height = avg_line_height
|
| 978 |
+
estimated_lines = max(2, round(height / line_height))
|
| 979 |
+
|
| 980 |
+
estimated_lines = min(estimated_lines, 15) # Cap at 15 lines
|
| 981 |
+
|
| 982 |
+
# Calculate padding (adaptive: 8% of line height, min 2px)
|
| 983 |
+
padding = max(2, int(line_height * 0.08))
|
| 984 |
+
|
| 985 |
+
# Split geometrically
|
| 986 |
for i in range(estimated_lines):
|
| 987 |
new_item = item.copy()
|
| 988 |
|
|
|
|
| 989 |
if i == 0:
|
|
|
|
| 990 |
new_y1 = y1
|
| 991 |
new_y2 = y1 + line_height + padding
|
| 992 |
elif i == estimated_lines - 1:
|
|
|
|
| 993 |
new_y1 = y1 + (i * line_height) - padding
|
| 994 |
new_y2 = y2
|
| 995 |
else:
|
|
|
|
| 996 |
new_y1 = y1 + (i * line_height) - padding
|
| 997 |
new_y2 = y1 + ((i + 1) * line_height) + padding
|
| 998 |
|
|
|
|
| 999 |
new_y1 = max(y1, int(new_y1))
|
| 1000 |
new_y2 = min(y2, int(new_y2))
|
| 1001 |
|
| 1002 |
+
if new_y2 > new_y1:
|
| 1003 |
new_item['bbox'] = [x1, new_y1, x2, new_y2]
|
|
|
|
| 1004 |
new_item['text'] = ""
|
| 1005 |
new_item['split_from_parent'] = True
|
| 1006 |
+
new_item['needs_reocr'] = True
|
| 1007 |
new_item['line_number'] = i + 1
|
| 1008 |
result.append(new_item)
|
| 1009 |
+
|
| 1010 |
split_count += 1
|
| 1011 |
+
|
| 1012 |
+
else:
|
| 1013 |
+
# Region is reasonably sized - keep as is
|
| 1014 |
+
print(f" Region: {category} (h={height}px) - Keeping as single line")
|
| 1015 |
+
result.append(item)
|
| 1016 |
|
| 1017 |
if split_count > 0:
|
| 1018 |
+
print(f"📏 Split {split_count} regions into {len(result)} total lines")
|
| 1019 |
|
| 1020 |
return result
|
| 1021 |
|