Spaces:

Curify
/

manga_translation

Runtime error

App Files Files Community

qqwjq1981 commited on Dec 8, 2025

Commit

3435684

verified ·

1 Parent(s): b164a37

Update utils/image_utils.py

Browse files

Files changed (1) hide show

utils/image_utils.py +263 -45

utils/image_utils.py CHANGED Viewed

@@ -6,39 +6,214 @@ import base64
 from io import BytesIO
-# -----------------------------
-# Find low-complexity horizontal strip
-# -----------------------------
-def find_low_complexity_row(gray, target_row, search_pct=0.2):
     """
-    Find a nearby row (within ±20%) that has low edge/text density.
     """
     h, w = gray.shape
-    search_radius = int(h * search_pct)
-    start = max(0, target_row - search_radius)
-    end   = min(h - 1, target_row + search_radius)
-    # Edge detection
-    edges = cv2.Canny(gray, 80, 160)
-    # Row-wise edge density
-    row_scores = edges[start:end].sum(axis=1)
-    # Choose min edge-density row
     best_local_idx = np.argmin(row_scores)
-    best_row = start + best_local_idx
-    return best_row
-# -----------------------------
-# Load & Split Image (Unified API)
-# -----------------------------
-def load_and_split_image(file_obj, num_chunks):
     """
-    Loads an image from a file object and splits it into num_chunks using
-    intelligent horizontal strip detection (avoids slicing through text/bubbles).
     """
     if file_obj is not None:
         image_path = file_obj.name if hasattr(file_obj, "name") else file_obj
@@ -46,52 +221,95 @@ def load_and_split_image(file_obj, num_chunks):
     else:
         image_path = "00_sample.jpg"
         filename = "00_sample.jpg"
     # Load original image
     image = Image.open(image_path).convert("RGB")
     width, height = image.size
-    # Convert to OpenCV for processing
     img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
-    # If only 1 chunk → no split needed
     if num_chunks <= 1:
         return filename, image, [image]
-    # Compute approximate uniform splits
-    approx_points = [int(i * height / num_chunks) for i in range(1, num_chunks)]
-    # Adjust each split to nearest low-complexity row
-    split_points = []
-    for pt in approx_points:
-        best = find_low_complexity_row(gray, target_row=pt)
-        split_points.append(best)
-    # Add top and bottom bounds
-    split_points = [0] + split_points + [height]
     # Produce final chunks
     chunks = []
-    for i in range(num_chunks):
         top = split_points[i]
         bottom = split_points[i + 1]
         chunk = image.crop((0, top, width, bottom))
         chunks.append(chunk)
     return filename, image, chunks
-# -----------------------------
 # Encode Image to HTML
-# -----------------------------
 def encode_image_to_html(image: Image.Image) -> str:
     buffered = BytesIO()
     image.save(buffered, format="PNG")
     encoded = base64.b64encode(buffered.getvalue()).decode()
     return f"""
     <div style="height:500px; overflow-y:auto; border:1px solid #ccc;">
         <img src="data:image/png;base64,{encoded}" style="width:100%;" />
     </div>
-    """

 from io import BytesIO
+# ---------------------------------------------------------------------
+# Find solid strips (low complexity horizontal regions)
+# ---------------------------------------------------------------------
+def analyze_horizontal_complexity(gray, window_size=5):
     """
+    Analyze complexity of each horizontal strip in the image.
+    Returns array of complexity scores (lower = more suitable for splitting).
+    Args:
+        gray: Grayscale image
+        window_size: Height of strip to analyze
+    Returns:
+        Array of complexity scores for each row
     """
     h, w = gray.shape
+    # Detect edges
+    edges = cv2.Canny(gray, 80, 160)
+    # Calculate variance (texture complexity) and edge density for each row
+    complexity_scores = []
+    for y in range(h):
+        # Define window around this row
+        y_start = max(0, y - window_size // 2)
+        y_end = min(h, y + window_size // 2)
+        window = gray[y_start:y_end, :]
+        edge_window = edges[y_start:y_end, :]
+        # Edge density
+        edge_score = np.sum(edge_window) / (w * (y_end - y_start))
+        # Variance (texture)
+        variance_score = np.var(window)
+        # Combined score (normalized)
+        combined = edge_score + variance_score / 255.0
+        complexity_scores.append(combined)
+    return np.array(complexity_scores)
+def find_solid_strips(gray, min_strip_height=10, complexity_threshold=0.1):
+    """
+    Find all solid/low-complexity horizontal strips suitable for splitting.
+    Args:
+        gray: Grayscale image
+        min_strip_height: Minimum consecutive rows with low complexity
+        complexity_threshold: Maximum complexity score (lower = stricter)
+    Returns:
+        List of (start_y, end_y, score) tuples for solid strips
+    """
+    h = gray.shape[0]
+    complexity = analyze_horizontal_complexity(gray)
+    # Normalize complexity scores
+    if complexity.max() > 0:
+        complexity = complexity / complexity.max()
+    # Find runs of low complexity
+    is_simple = complexity < complexity_threshold
+    strips = []
+    start = None
+    for i in range(h):
+        if is_simple[i]:
+            if start is None:
+                start = i
+        else:
+            if start is not None:
+                # End of strip
+                if i - start >= min_strip_height:
+                    avg_score = np.mean(complexity[start:i])
+                    strips.append((start, i, avg_score))
+                start = None
+    # Handle strip at end of image
+    if start is not None and h - start >= min_strip_height:
+        avg_score = np.mean(complexity[start:h])
+        strips.append((start, h, avg_score))
+    # Sort by score (best strips first)
+    strips.sort(key=lambda x: x[2])
+    return strips
+def find_best_split_location(gray, target_row, search_pct=0.2, prefer_solid_strips=True):
+    """
+    Find the best row near target_row for splitting.
+    Args:
+        gray: Grayscale image
+        target_row: Desired split location
+        search_pct: Search radius as percentage of image height
+        prefer_solid_strips: If True, strongly prefer solid strips
+    Returns:
+        Best row index for splitting
+    """
+    h, w = gray.shape
+    search_radius = int(h * search_pct)
+    start = max(0, target_row - search_radius)
+    end = min(h - 1, target_row + search_radius)
+    if prefer_solid_strips:
+        # Find all solid strips in the search region
+        search_region = gray[start:end, :]
+        strips = find_solid_strips(search_region, min_strip_height=5, complexity_threshold=0.15)
+        if strips:
+            # Choose strip closest to target
+            best_strip = min(strips, key=lambda s: abs((s[0] + s[1]) // 2 - (target_row - start)))
+            # Return center of strip
+            return start + (best_strip[0] + best_strip[1]) // 2
+    # Fallback: use edge density
+    edges = cv2.Canny(gray, 80, 160)
+    row_scores = edges[start:end].sum(axis=1)
     best_local_idx = np.argmin(row_scores)
+    return start + best_local_idx
+def find_optimal_splits(gray, desired_chunks, min_chunk_height=200):
+    """
+    Find optimal split locations, potentially returning fewer chunks if
+    good split points don't exist.
+    Args:
+        gray: Grayscale image
+        desired_chunks: Target number of chunks
+        min_chunk_height: Minimum height for each chunk
+    Returns:
+        List of split points (y-coordinates)
+    """
+    h = gray.shape[0]
+    # If image too small for desired chunks, reduce
+    max_possible_chunks = max(1, h // min_chunk_height)
+    actual_chunks = min(desired_chunks, max_possible_chunks)
+    if actual_chunks <= 1:
+        print(f"⚠️ Image too small for multiple chunks ({h}px height)")
+        return [0, h]
+    # Find all solid strips
+    solid_strips = find_solid_strips(gray, min_strip_height=10, complexity_threshold=0.12)
+    if not solid_strips:
+        print("⚠️ No solid strips found, using uniform splits")
+        # Fallback to uniform splits
+        splits = [int(i * h / actual_chunks) for i in range(actual_chunks + 1)]
+        return splits
+    print(f"✓ Found {len(solid_strips)} solid strips")
+    # Calculate ideal split locations
+    ideal_splits = [int(i * h / actual_chunks) for i in range(1, actual_chunks)]
+    # Match each ideal split to nearest solid strip
+    actual_splits = [0]  # Start
+    for target in ideal_splits:
+        # Find closest solid strip center
+        best_strip = min(solid_strips, key=lambda s: abs((s[0] + s[1]) // 2 - target))
+        split_y = (best_strip[0] + best_strip[1]) // 2
+        # Ensure minimum spacing from previous split
+        if split_y - actual_splits[-1] >= min_chunk_height:
+            actual_splits.append(split_y)
+        else:
+            print(f"⚠️ Skipping split at {split_y} (too close to previous)")
+    actual_splits.append(h)  # End
+    num_resulting_chunks = len(actual_splits) - 1
+    if num_resulting_chunks < desired_chunks:
+        print(f"ℹ️ Returning {num_resulting_chunks} chunks (requested {desired_chunks}, but not enough good split points)")
+    return actual_splits
+# ---------------------------------------------------------------------
+# Load & Split Image (Enhanced)
+# ---------------------------------------------------------------------
+def load_and_split_image(file_obj, num_chunks, min_chunk_height=200, allow_fewer_chunks=True):
     """
+    Loads an image and splits it intelligently across solid strips.
+    Can return fewer chunks than requested if good split points don't exist.
+    Args:
+        file_obj: File object or path
+        num_chunks: Desired number of chunks
+        min_chunk_height: Minimum height per chunk (pixels)
+        allow_fewer_chunks: If True, can return < num_chunks
+    Returns:
+        (filename, original_image, list_of_chunks)
     """
     if file_obj is not None:
         image_path = file_obj.name if hasattr(file_obj, "name") else file_obj
     else:
         image_path = "00_sample.jpg"
         filename = "00_sample.jpg"
     # Load original image
     image = Image.open(image_path).convert("RGB")
     width, height = image.size
+    print(f"📏 Image size: {width}x{height}")
+    # Convert to OpenCV for analysis
     img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
+    # If only 1 chunk requested, no split needed
     if num_chunks <= 1:
         return filename, image, [image]
+    # Find optimal split locations
+    if allow_fewer_chunks:
+        split_points = find_optimal_splits(gray, num_chunks, min_chunk_height)
+    else:
+        # Old behavior: always return exact number of chunks
+        approx_points = [int(i * height / num_chunks) for i in range(1, num_chunks)]
+        split_points = [0]
+        for pt in approx_points:
+            best = find_best_split_location(gray, target_row=pt, prefer_solid_strips=True)
+            split_points.append(best)
+        split_points.append(height)
     # Produce final chunks
     chunks = []
+    num_actual_chunks = len(split_points) - 1
+    for i in range(num_actual_chunks):
         top = split_points[i]
         bottom = split_points[i + 1]
         chunk = image.crop((0, top, width, bottom))
         chunks.append(chunk)
+        print(f"  Chunk {i+1}: rows {top}-{bottom} (height: {bottom-top}px)")
+    print(f"✅ Split into {len(chunks)} chunks")
     return filename, image, chunks
+# ---------------------------------------------------------------------
+# Visualization Helper
+# ---------------------------------------------------------------------
+def visualize_split_analysis(gray, split_points):
+    """
+    Create a visualization showing complexity analysis and split points.
+    Useful for debugging split decisions.
+    """
+    h, w = gray.shape
+    # Analyze complexity
+    complexity = analyze_horizontal_complexity(gray)
+    # Create visualization
+    vis = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
+    # Draw complexity heatmap on the side
+    heatmap_width = 50
+    heatmap = np.zeros((h, heatmap_width, 3), dtype=np.uint8)
+    normalized_complexity = (complexity / complexity.max() * 255).astype(np.uint8)
+    for y in range(h):
+        color_val = normalized_complexity[y]
+        heatmap[y, :] = [0, 255 - color_val, color_val]  # Green=low, Red=high
+    # Draw split lines
+    for split_y in split_points[1:-1]:  # Skip first and last
+        cv2.line(vis, (0, split_y), (w, split_y), (0, 255, 0), 2)
+    # Combine
+    result = np.hstack([vis, heatmap])
+    return result
+# ---------------------------------------------------------------------
 # Encode Image to HTML
+# ---------------------------------------------------------------------
 def encode_image_to_html(image: Image.Image) -> str:
     buffered = BytesIO()
     image.save(buffered, format="PNG")
     encoded = base64.b64encode(buffered.getvalue()).decode()
     return f"""
     <div style="height:500px; overflow-y:auto; border:1px solid #ccc;">
         <img src="data:image/png;base64,{encoded}" style="width:100%;" />
     </div>
+    """