Spaces:

Curify
/

manga_translation

Sleeping

File size: 10,433 Bytes

import os
import numpy as np
import cv2
from PIL import Image
import base64
from io import BytesIO


# ---------------------------------------------------------------------
# Find solid strips (low complexity horizontal regions)
# ---------------------------------------------------------------------

def analyze_horizontal_complexity(gray, window_size=5):
    """
    Analyze complexity of each horizontal strip in the image.
    Returns array of complexity scores (lower = more suitable for splitting).
    
    Args:
        gray: Grayscale image
        window_size: Height of strip to analyze
    
    Returns:
        Array of complexity scores for each row
    """
    h, w = gray.shape
    
    # Detect edges
    edges = cv2.Canny(gray, 80, 160)
    
    # Calculate variance (texture complexity) and edge density for each row
    complexity_scores = []
    
    for y in range(h):
        # Define window around this row
        y_start = max(0, y - window_size // 2)
        y_end = min(h, y + window_size // 2)
        
        window = gray[y_start:y_end, :]
        edge_window = edges[y_start:y_end, :]
        
        # Edge density
        edge_score = np.sum(edge_window) / (w * (y_end - y_start))
        
        # Variance (texture)
        variance_score = np.var(window)
        
        # Combined score (normalized)
        combined = edge_score + variance_score / 255.0
        complexity_scores.append(combined)
    
    return np.array(complexity_scores)


def find_solid_strips(gray, min_strip_height=10, complexity_threshold=0.1):
    """
    Find all solid/low-complexity horizontal strips suitable for splitting.
    
    Args:
        gray: Grayscale image
        min_strip_height: Minimum consecutive rows with low complexity
        complexity_threshold: Maximum complexity score (lower = stricter)
    
    Returns:
        List of (start_y, end_y, score) tuples for solid strips
    """
    h = gray.shape[0]
    complexity = analyze_horizontal_complexity(gray)
    
    # Normalize complexity scores
    if complexity.max() > 0:
        complexity = complexity / complexity.max()
    
    # Find runs of low complexity
    is_simple = complexity < complexity_threshold
    
    strips = []
    start = None
    
    for i in range(h):
        if is_simple[i]:
            if start is None:
                start = i
        else:
            if start is not None:
                # End of strip
                if i - start >= min_strip_height:
                    avg_score = np.mean(complexity[start:i])
                    strips.append((start, i, avg_score))
                start = None
    
    # Handle strip at end of image
    if start is not None and h - start >= min_strip_height:
        avg_score = np.mean(complexity[start:h])
        strips.append((start, h, avg_score))
    
    # Sort by score (best strips first)
    strips.sort(key=lambda x: x[2])
    
    return strips


def find_best_split_location(gray, target_row, search_pct=0.2, prefer_solid_strips=True):
    """
    Find the best row near target_row for splitting.
    
    Args:
        gray: Grayscale image
        target_row: Desired split location
        search_pct: Search radius as percentage of image height
        prefer_solid_strips: If True, strongly prefer solid strips
    
    Returns:
        Best row index for splitting
    """
    h, w = gray.shape
    search_radius = int(h * search_pct)
    
    start = max(0, target_row - search_radius)
    end = min(h - 1, target_row + search_radius)
    
    if prefer_solid_strips:
        # Find all solid strips in the search region
        search_region = gray[start:end, :]
        strips = find_solid_strips(search_region, min_strip_height=5, complexity_threshold=0.15)
        
        if strips:
            # Choose strip closest to target
            best_strip = min(strips, key=lambda s: abs((s[0] + s[1]) // 2 - (target_row - start)))
            # Return center of strip
            return start + (best_strip[0] + best_strip[1]) // 2
    
    # Fallback: use edge density
    edges = cv2.Canny(gray, 80, 160)
    row_scores = edges[start:end].sum(axis=1)
    best_local_idx = np.argmin(row_scores)
    
    return start + best_local_idx


def find_optimal_splits(gray, desired_chunks, min_chunk_height=200):
    """
    Find optimal split locations, potentially returning fewer chunks if
    good split points don't exist.
    
    Args:
        gray: Grayscale image
        desired_chunks: Target number of chunks
        min_chunk_height: Minimum height for each chunk
    
    Returns:
        List of split points (y-coordinates)
    """
    h = gray.shape[0]
    
    # If image too small for desired chunks, reduce
    max_possible_chunks = max(1, h // min_chunk_height)
    actual_chunks = min(desired_chunks, max_possible_chunks)
    
    if actual_chunks <= 1:
        print(f"⚠️ Image too small for multiple chunks ({h}px height)")
        return [0, h]
    
    # Find all solid strips
    solid_strips = find_solid_strips(gray, min_strip_height=10, complexity_threshold=0.12)
    
    if not solid_strips:
        print("⚠️ No solid strips found, using uniform splits")
        # Fallback to uniform splits
        splits = [int(i * h / actual_chunks) for i in range(actual_chunks + 1)]
        return splits
    
    print(f"✓ Found {len(solid_strips)} solid strips")
    
    # Calculate ideal split locations
    ideal_splits = [int(i * h / actual_chunks) for i in range(1, actual_chunks)]
    
    # Match each ideal split to nearest solid strip
    actual_splits = [0]  # Start
    
    for target in ideal_splits:
        # Find closest solid strip center
        best_strip = min(solid_strips, key=lambda s: abs((s[0] + s[1]) // 2 - target))
        split_y = (best_strip[0] + best_strip[1]) // 2
        
        # Ensure minimum spacing from previous split
        if split_y - actual_splits[-1] >= min_chunk_height:
            actual_splits.append(split_y)
        else:
            print(f"⚠️ Skipping split at {split_y} (too close to previous)")
    
    actual_splits.append(h)  # End
    
    num_resulting_chunks = len(actual_splits) - 1
    if num_resulting_chunks < desired_chunks:
        print(f"ℹ️ Returning {num_resulting_chunks} chunks (requested {desired_chunks}, but not enough good split points)")
    
    return actual_splits


# ---------------------------------------------------------------------
# Load & Split Image (Enhanced)
# ---------------------------------------------------------------------

def load_and_split_image(file_obj, num_chunks, min_chunk_height=200, allow_fewer_chunks=True):
    """
    Loads an image and splits it intelligently across solid strips.
    Can return fewer chunks than requested if good split points don't exist.
    
    Args:
        file_obj: File object or path
        num_chunks: Desired number of chunks
        min_chunk_height: Minimum height per chunk (pixels)
        allow_fewer_chunks: If True, can return < num_chunks
    
    Returns:
        (filename, original_image, list_of_chunks)
    """
    if file_obj is not None:
        image_path = file_obj.name if hasattr(file_obj, "name") else file_obj
        filename = os.path.basename(image_path)
    else:
        image_path = "00_sample.jpg"
        filename = "00_sample.jpg"
    
    # Load original image
    image = Image.open(image_path).convert("RGB")
    width, height = image.size
    
    print(f"📏 Image size: {width}x{height}")
    
    # Convert to OpenCV for analysis
    img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
    
    # If only 1 chunk requested, no split needed
    if num_chunks <= 1:
        return filename, image, [image]
    
    # Find optimal split locations
    if allow_fewer_chunks:
        split_points = find_optimal_splits(gray, num_chunks, min_chunk_height)
    else:
        # Old behavior: always return exact number of chunks
        approx_points = [int(i * height / num_chunks) for i in range(1, num_chunks)]
        split_points = [0]
        for pt in approx_points:
            best = find_best_split_location(gray, target_row=pt, prefer_solid_strips=True)
            split_points.append(best)
        split_points.append(height)
    
    # Produce final chunks
    chunks = []
    num_actual_chunks = len(split_points) - 1
    
    for i in range(num_actual_chunks):
        top = split_points[i]
        bottom = split_points[i + 1]
        chunk = image.crop((0, top, width, bottom))
        chunks.append(chunk)
        print(f"  Chunk {i+1}: rows {top}-{bottom} (height: {bottom-top}px)")
    
    print(f"✅ Split into {len(chunks)} chunks")
    return filename, image, chunks


# ---------------------------------------------------------------------
# Visualization Helper
# ---------------------------------------------------------------------

def visualize_split_analysis(gray, split_points):
    """
    Create a visualization showing complexity analysis and split points.
    Useful for debugging split decisions.
    """
    h, w = gray.shape
    
    # Analyze complexity
    complexity = analyze_horizontal_complexity(gray)
    
    # Create visualization
    vis = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
    
    # Draw complexity heatmap on the side
    heatmap_width = 50
    heatmap = np.zeros((h, heatmap_width, 3), dtype=np.uint8)
    
    normalized_complexity = (complexity / complexity.max() * 255).astype(np.uint8)
    for y in range(h):
        color_val = normalized_complexity[y]
        heatmap[y, :] = [0, 255 - color_val, color_val]  # Green=low, Red=high
    
    # Draw split lines
    for split_y in split_points[1:-1]:  # Skip first and last
        cv2.line(vis, (0, split_y), (w, split_y), (0, 255, 0), 2)
    
    # Combine
    result = np.hstack([vis, heatmap])
    
    return result


# ---------------------------------------------------------------------
# Encode Image to HTML
# ---------------------------------------------------------------------

def encode_image_to_html(image: Image.Image) -> str:
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    encoded = base64.b64encode(buffered.getvalue()).decode()
    
    return f"""
    <div style="height:500px; overflow-y:auto; border:1px solid #ccc;">
        <img src="data:image/png;base64,{encoded}" style="width:100%;" />
    </div>
    """