Spaces:

VanguardAI
/

Arabic-OCR

Running

App Files Files Community

VanguardAI commited on Nov 5, 2025

Commit

6896445

verified ·

1 Parent(s): c265696

Update app.py

Browse files

Files changed (1) hide show

app.py +176 -36

app.py CHANGED Viewed

@@ -644,6 +644,80 @@ def estimate_text_density(image: Image.Image) -> float:
         return 0.1  # Default to low density
 def split_text_regions_into_lines(
     image: Image.Image,
     layout_data: List[Dict[str, Any]],
@@ -653,7 +727,7 @@ def split_text_regions_into_lines(
     """
     Post-process layout data to split large text regions into individual lines.
-    This ensures each line gets its own bounding box for easier verification.
     Args:
         image: Original image
@@ -683,53 +757,105 @@ def split_text_regions_into_lines(
         print(f"   Checking region: height={height}px, width={width}px, category={category}")
         # If region is tall enough to contain multiple lines, split it
-        if height > max_line_height:
-            print(f"   → Splitting! (height {height}px > threshold {max_line_height}px)")
-            # Estimate number of lines based on typical line height
             # Arabic handwritten text: ~40-60px per line
             # Arabic typed text: ~30-50px per line
             avg_line_height = 45  # Middle ground
-            estimated_lines = max(1, round(height / avg_line_height))
-            # Don't split into too many lines (might be a paragraph)
-            estimated_lines = min(estimated_lines, 10)
             line_height = height / estimated_lines
-            # Split text content by newlines if available
-            text_lines = text_content.split('\n') if text_content else []
-            # If we have the same number of text lines as estimated, use them
-            if len(text_lines) == estimated_lines and len(text_lines) > 1:
-                for i, line_text in enumerate(text_lines):
-                    if not line_text.strip():
-                        continue
-                    new_item = item.copy()
-                    new_y1 = y1 + (i * line_height)
-                    new_y2 = y1 + ((i + 1) * line_height)
-                    new_item['bbox'] = [x1, int(new_y1), x2, int(new_y2)]
                     new_item['text'] = line_text.strip()
                     new_item['split_from_parent'] = True
                     result.append(new_item)
-                split_count += 1
-            else:
-                # Split geometrically - mark for re-OCR to get accurate per-line text
-                for i in range(estimated_lines):
-                    new_item = item.copy()
-                    new_y1 = y1 + (i * line_height)
-                    new_y2 = y1 + ((i + 1) * line_height)
-                    new_item['bbox'] = [x1, int(new_y1), x2, int(new_y2)]
                     # Clear text - will be re-OCR'd per-line for accuracy
                     new_item['text'] = ""
                     new_item['split_from_parent'] = True
                     new_item['needs_reocr'] = True  # Flag for re-processing
                     new_item['line_number'] = i + 1
                     result.append(new_item)
-                split_count += 1
-        else:
-            # Region is already line-sized, keep as is
-            result.append(item)
     if split_count > 0:
         print(f"📏 Split {split_count} large regions into individual lines ({len(layout_data)} → {len(result)} regions)")
@@ -996,8 +1122,22 @@ def process_image(
                         if x2 <= x1 or y2 <= y1:
                             continue
                         # Crop and preprocess the line region
-                        crop_img = image.crop((x1, y1, x2, y2))
                         # Apply preprocessing to enhance handwriting quality
                         crop_img = preprocess_for_handwriting_ocr(crop_img)
@@ -1802,4 +1942,4 @@ if __name__ == "__main__":
         share=False,
         debug=True,
         show_error=True
-    )

         return 0.1  # Default to low density
+def detect_line_spacing(image: Image.Image, bbox: List[int]) -> float:
+    """
+    Detect average line spacing in a text region using horizontal projection analysis.
+    Returns estimated line height in pixels, or None if detection fails.
+    """
+    try:
+        x1, y1, x2, y2 = bbox
+        crop = image.crop((x1, y1, x2, y2))
+        # Convert to grayscale
+        gray = crop.convert('L')
+        img_array = np.array(gray)
+        if img_array.size == 0:
+            return None
+        # Horizontal projection: sum of dark pixels per row
+        # Text lines will have higher values
+        row_sums = np.sum(img_array < 128, axis=1)  # Count dark pixels per row
+        if len(row_sums) < 10:  # Need at least some rows
+            return None
+        # Find peaks (text lines) and valleys (spacing between lines)
+        # Use adaptive threshold to identify text rows
+        mean_val = np.mean(row_sums)
+        std_val = np.std(row_sums)
+        threshold = max(mean_val * 0.3, mean_val - std_val * 0.5)
+        text_rows = np.where(row_sums > threshold)[0]
+        if len(text_rows) < 2:
+            return None  # Can't detect spacing with less than 2 text rows
+        # Find gaps between text rows (line spacing)
+        # Group consecutive rows to find line centers
+        line_centers = []
+        current_group = [text_rows[0]]
+        for i in range(1, len(text_rows)):
+            if text_rows[i] - text_rows[i-1] <= 3:  # Consecutive or very close rows
+                current_group.append(text_rows[i])
+            else:
+                # End of current line, start new
+                line_centers.append(int(np.mean(current_group)))
+                current_group = [text_rows[i]]
+        # Add last group
+        if current_group:
+            line_centers.append(int(np.mean(current_group)))
+        if len(line_centers) < 2:
+            return None
+        # Calculate spacing between line centers
+        spacings = []
+        for i in range(len(line_centers) - 1):
+            spacing = line_centers[i+1] - line_centers[i]
+            if spacing > 10:  # Minimum reasonable spacing
+                spacings.append(spacing)
+        if spacings:
+            # Use median for robustness against outliers
+            avg_spacing = np.median(spacings)
+            print(f"   → Detected {len(line_centers)} lines with avg spacing {avg_spacing:.1f}px")
+            return float(avg_spacing)
+        return None
+    except Exception as e:
+        print(f"   ⚠️ Could not detect line spacing: {e}")
+        return None
 def split_text_regions_into_lines(
     image: Image.Image,
     layout_data: List[Dict[str, Any]],
     """
     Post-process layout data to split large text regions into individual lines.
+    Uses intelligent line spacing detection and padding to avoid cutting through text.
     Args:
         image: Original image
         print(f"   Checking region: height={height}px, width={width}px, category={category}")
+        # If region is already reasonably line-sized, keep it
+        if height <= max_line_height:
+            print(f"   ✓ Already line-sized (height {height}px <= {max_line_height}px)")
+            result.append(item)
+            continue
         # If region is tall enough to contain multiple lines, split it
+        print(f"   → Splitting! (height {height}px > threshold {max_line_height}px)")
+        # Try to detect actual line spacing from the image
+        detected_spacing = detect_line_spacing(image, bbox)
+        if detected_spacing and detected_spacing > min_line_height:
+            # Use detected spacing for more accurate splitting
+            estimated_lines = max(2, round(height / detected_spacing))
+            line_height = detected_spacing
+            print(f"   → Detected line spacing: {detected_spacing:.1f}px, splitting into ~{estimated_lines} lines")
+        else:
+            # Fallback to estimated line height
             # Arabic handwritten text: ~40-60px per line
             # Arabic typed text: ~30-50px per line
             avg_line_height = 45  # Middle ground
+            estimated_lines = max(2, round(height / avg_line_height))
             line_height = height / estimated_lines
+            print(f"   → Using estimated line height: {avg_line_height}px, splitting into {estimated_lines} lines")
+        # Don't split into too many lines (might be a paragraph)
+        estimated_lines = min(estimated_lines, 10)
+        # Calculate padding to avoid cutting through text
+        # Use 10% of line height as padding, but at least 3px
+        padding = max(3, int(line_height * 0.1))
+        # Split text content by newlines if available
+        text_lines = text_content.split('\n') if text_content else []
+        # If we have the same number of text lines as estimated, use them
+        if len(text_lines) == estimated_lines and len(text_lines) > 1:
+            for i, line_text in enumerate(text_lines):
+                if not line_text.strip():
+                    continue
+                new_item = item.copy()
+                # Calculate bbox with padding to avoid cutting text
+                if i == 0:
+                    # First line: pad bottom only
+                    new_y1 = y1
+                    new_y2 = y1 + line_height + padding
+                elif i == estimated_lines - 1:
+                    # Last line: pad top only
+                    new_y1 = y1 + (i * line_height) - padding
+                    new_y2 = y2
+                else:
+                    # Middle lines: pad both top and bottom
+                    new_y1 = y1 + (i * line_height) - padding
+                    new_y2 = y1 + ((i + 1) * line_height) + padding
+                # Ensure bbox is valid and within image bounds
+                new_y1 = max(y1, int(new_y1))
+                new_y2 = min(y2, int(new_y2))
+                if new_y2 > new_y1:  # Valid bbox
+                    new_item['bbox'] = [x1, new_y1, x2, new_y2]
                     new_item['text'] = line_text.strip()
                     new_item['split_from_parent'] = True
                     result.append(new_item)
+            split_count += 1
+        else:
+            # Split geometrically - mark for re-OCR to get accurate per-line text
+            for i in range(estimated_lines):
+                new_item = item.copy()
+                # Calculate bbox with padding to avoid cutting text
+                if i == 0:
+                    # First line: pad bottom only
+                    new_y1 = y1
+                    new_y2 = y1 + line_height + padding
+                elif i == estimated_lines - 1:
+                    # Last line: pad top only
+                    new_y1 = y1 + (i * line_height) - padding
+                    new_y2 = y2
+                else:
+                    # Middle lines: pad both top and bottom
+                    new_y1 = y1 + (i * line_height) - padding
+                    new_y2 = y1 + ((i + 1) * line_height) + padding
+                # Ensure bbox is valid and within image bounds
+                new_y1 = max(y1, int(new_y1))
+                new_y2 = min(y2, int(new_y2))
+                if new_y2 > new_y1:  # Valid bbox
+                    new_item['bbox'] = [x1, new_y1, x2, new_y2]
                     # Clear text - will be re-OCR'd per-line for accuracy
                     new_item['text'] = ""
                     new_item['split_from_parent'] = True
                     new_item['needs_reocr'] = True  # Flag for re-processing
                     new_item['line_number'] = i + 1
                     result.append(new_item)
+            split_count += 1
     if split_count > 0:
         print(f"📏 Split {split_count} large regions into individual lines ({len(layout_data)} → {len(result)} regions)")
                         if x2 <= x1 or y2 <= y1:
                             continue
+                        # Add small safety margin to ensure we capture full text
+                        margin = 2  # Small margin to avoid edge clipping
+                        crop_x1 = max(0, x1 - margin)
+                        crop_y1 = max(0, y1 - margin)
+                        crop_x2 = min(image.width, x2 + margin)
+                        crop_y2 = min(image.height, y2 + margin)
                         # Crop and preprocess the line region
+                        crop_img = image.crop((crop_x1, crop_y1, crop_x2, crop_y2))
+                        # Validate crop is reasonable size
+                        if crop_img.size[0] < 10 or crop_img.size[1] < 10:
+                            print(f"   ⚠️ Skipping line {idx+1}: crop too small ({crop_img.size})")
+                            item['text'] = "[Crop too small]"
+                            item['confidence'] = 0.0
+                            continue
                         # Apply preprocessing to enhance handwriting quality
                         crop_img = preprocess_for_handwriting_ocr(crop_img)
         share=False,
         debug=True,
         show_error=True
+    )