modified: main.py
Browse files
main.py
CHANGED
|
@@ -354,8 +354,9 @@ class WorksheetSolver():
|
|
| 354 |
heights = [(gap[3] - gap[1]) for gap in gaps]
|
| 355 |
avg_height = sum(heights) / len(heights) if heights else 0
|
| 356 |
|
| 357 |
-
# Distance threshold:
|
| 358 |
distance_threshold = avg_height * 1.5
|
|
|
|
| 359 |
|
| 360 |
groups = []
|
| 361 |
gap_to_group = {}
|
|
@@ -410,8 +411,8 @@ class WorksheetSolver():
|
|
| 410 |
j_width = j_right - j_left
|
| 411 |
min_width = min(i_width, j_width)
|
| 412 |
|
| 413 |
-
# Check if box j is
|
| 414 |
-
if
|
| 415 |
# At least 30% overlap or 15px minimum
|
| 416 |
if h_overlap > min_width * 0.3 or h_overlap > 15:
|
| 417 |
current_group.append(j)
|
|
|
|
| 354 |
heights = [(gap[3] - gap[1]) for gap in gaps]
|
| 355 |
avg_height = sum(heights) / len(heights) if heights else 0
|
| 356 |
|
| 357 |
+
# Distance threshold: line boxes may slightly overlap or be very close
|
| 358 |
distance_threshold = avg_height * 1.5
|
| 359 |
+
overlap_tolerance = max(5, int(avg_height * 0.15))
|
| 360 |
|
| 361 |
groups = []
|
| 362 |
gap_to_group = {}
|
|
|
|
| 411 |
j_width = j_right - j_left
|
| 412 |
min_width = min(i_width, j_width)
|
| 413 |
|
| 414 |
+
# Check if box j is vertically close enough and horizontally aligned
|
| 415 |
+
if -overlap_tolerance <= vertical_distance < distance_threshold:
|
| 416 |
# At least 30% overlap or 15px minimum
|
| 417 |
if h_overlap > min_width * 0.3 or h_overlap > 15:
|
| 418 |
current_group.append(j)
|