manga_translation / utils /image_utils.py
qqwjq1981's picture
Update utils/image_utils.py
3435684 verified
import os
import numpy as np
import cv2
from PIL import Image
import base64
from io import BytesIO
# ---------------------------------------------------------------------
# Find solid strips (low complexity horizontal regions)
# ---------------------------------------------------------------------
def analyze_horizontal_complexity(gray, window_size=5):
"""
Analyze complexity of each horizontal strip in the image.
Returns array of complexity scores (lower = more suitable for splitting).
Args:
gray: Grayscale image
window_size: Height of strip to analyze
Returns:
Array of complexity scores for each row
"""
h, w = gray.shape
# Detect edges
edges = cv2.Canny(gray, 80, 160)
# Calculate variance (texture complexity) and edge density for each row
complexity_scores = []
for y in range(h):
# Define window around this row
y_start = max(0, y - window_size // 2)
y_end = min(h, y + window_size // 2)
window = gray[y_start:y_end, :]
edge_window = edges[y_start:y_end, :]
# Edge density
edge_score = np.sum(edge_window) / (w * (y_end - y_start))
# Variance (texture)
variance_score = np.var(window)
# Combined score (normalized)
combined = edge_score + variance_score / 255.0
complexity_scores.append(combined)
return np.array(complexity_scores)
def find_solid_strips(gray, min_strip_height=10, complexity_threshold=0.1):
"""
Find all solid/low-complexity horizontal strips suitable for splitting.
Args:
gray: Grayscale image
min_strip_height: Minimum consecutive rows with low complexity
complexity_threshold: Maximum complexity score (lower = stricter)
Returns:
List of (start_y, end_y, score) tuples for solid strips
"""
h = gray.shape[0]
complexity = analyze_horizontal_complexity(gray)
# Normalize complexity scores
if complexity.max() > 0:
complexity = complexity / complexity.max()
# Find runs of low complexity
is_simple = complexity < complexity_threshold
strips = []
start = None
for i in range(h):
if is_simple[i]:
if start is None:
start = i
else:
if start is not None:
# End of strip
if i - start >= min_strip_height:
avg_score = np.mean(complexity[start:i])
strips.append((start, i, avg_score))
start = None
# Handle strip at end of image
if start is not None and h - start >= min_strip_height:
avg_score = np.mean(complexity[start:h])
strips.append((start, h, avg_score))
# Sort by score (best strips first)
strips.sort(key=lambda x: x[2])
return strips
def find_best_split_location(gray, target_row, search_pct=0.2, prefer_solid_strips=True):
"""
Find the best row near target_row for splitting.
Args:
gray: Grayscale image
target_row: Desired split location
search_pct: Search radius as percentage of image height
prefer_solid_strips: If True, strongly prefer solid strips
Returns:
Best row index for splitting
"""
h, w = gray.shape
search_radius = int(h * search_pct)
start = max(0, target_row - search_radius)
end = min(h - 1, target_row + search_radius)
if prefer_solid_strips:
# Find all solid strips in the search region
search_region = gray[start:end, :]
strips = find_solid_strips(search_region, min_strip_height=5, complexity_threshold=0.15)
if strips:
# Choose strip closest to target
best_strip = min(strips, key=lambda s: abs((s[0] + s[1]) // 2 - (target_row - start)))
# Return center of strip
return start + (best_strip[0] + best_strip[1]) // 2
# Fallback: use edge density
edges = cv2.Canny(gray, 80, 160)
row_scores = edges[start:end].sum(axis=1)
best_local_idx = np.argmin(row_scores)
return start + best_local_idx
def find_optimal_splits(gray, desired_chunks, min_chunk_height=200):
"""
Find optimal split locations, potentially returning fewer chunks if
good split points don't exist.
Args:
gray: Grayscale image
desired_chunks: Target number of chunks
min_chunk_height: Minimum height for each chunk
Returns:
List of split points (y-coordinates)
"""
h = gray.shape[0]
# If image too small for desired chunks, reduce
max_possible_chunks = max(1, h // min_chunk_height)
actual_chunks = min(desired_chunks, max_possible_chunks)
if actual_chunks <= 1:
print(f"โš ๏ธ Image too small for multiple chunks ({h}px height)")
return [0, h]
# Find all solid strips
solid_strips = find_solid_strips(gray, min_strip_height=10, complexity_threshold=0.12)
if not solid_strips:
print("โš ๏ธ No solid strips found, using uniform splits")
# Fallback to uniform splits
splits = [int(i * h / actual_chunks) for i in range(actual_chunks + 1)]
return splits
print(f"โœ“ Found {len(solid_strips)} solid strips")
# Calculate ideal split locations
ideal_splits = [int(i * h / actual_chunks) for i in range(1, actual_chunks)]
# Match each ideal split to nearest solid strip
actual_splits = [0] # Start
for target in ideal_splits:
# Find closest solid strip center
best_strip = min(solid_strips, key=lambda s: abs((s[0] + s[1]) // 2 - target))
split_y = (best_strip[0] + best_strip[1]) // 2
# Ensure minimum spacing from previous split
if split_y - actual_splits[-1] >= min_chunk_height:
actual_splits.append(split_y)
else:
print(f"โš ๏ธ Skipping split at {split_y} (too close to previous)")
actual_splits.append(h) # End
num_resulting_chunks = len(actual_splits) - 1
if num_resulting_chunks < desired_chunks:
print(f"โ„น๏ธ Returning {num_resulting_chunks} chunks (requested {desired_chunks}, but not enough good split points)")
return actual_splits
# ---------------------------------------------------------------------
# Load & Split Image (Enhanced)
# ---------------------------------------------------------------------
def load_and_split_image(file_obj, num_chunks, min_chunk_height=200, allow_fewer_chunks=True):
"""
Loads an image and splits it intelligently across solid strips.
Can return fewer chunks than requested if good split points don't exist.
Args:
file_obj: File object or path
num_chunks: Desired number of chunks
min_chunk_height: Minimum height per chunk (pixels)
allow_fewer_chunks: If True, can return < num_chunks
Returns:
(filename, original_image, list_of_chunks)
"""
if file_obj is not None:
image_path = file_obj.name if hasattr(file_obj, "name") else file_obj
filename = os.path.basename(image_path)
else:
image_path = "00_sample.jpg"
filename = "00_sample.jpg"
# Load original image
image = Image.open(image_path).convert("RGB")
width, height = image.size
print(f"๐Ÿ“ Image size: {width}x{height}")
# Convert to OpenCV for analysis
img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
# If only 1 chunk requested, no split needed
if num_chunks <= 1:
return filename, image, [image]
# Find optimal split locations
if allow_fewer_chunks:
split_points = find_optimal_splits(gray, num_chunks, min_chunk_height)
else:
# Old behavior: always return exact number of chunks
approx_points = [int(i * height / num_chunks) for i in range(1, num_chunks)]
split_points = [0]
for pt in approx_points:
best = find_best_split_location(gray, target_row=pt, prefer_solid_strips=True)
split_points.append(best)
split_points.append(height)
# Produce final chunks
chunks = []
num_actual_chunks = len(split_points) - 1
for i in range(num_actual_chunks):
top = split_points[i]
bottom = split_points[i + 1]
chunk = image.crop((0, top, width, bottom))
chunks.append(chunk)
print(f" Chunk {i+1}: rows {top}-{bottom} (height: {bottom-top}px)")
print(f"โœ… Split into {len(chunks)} chunks")
return filename, image, chunks
# ---------------------------------------------------------------------
# Visualization Helper
# ---------------------------------------------------------------------
def visualize_split_analysis(gray, split_points):
"""
Create a visualization showing complexity analysis and split points.
Useful for debugging split decisions.
"""
h, w = gray.shape
# Analyze complexity
complexity = analyze_horizontal_complexity(gray)
# Create visualization
vis = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
# Draw complexity heatmap on the side
heatmap_width = 50
heatmap = np.zeros((h, heatmap_width, 3), dtype=np.uint8)
normalized_complexity = (complexity / complexity.max() * 255).astype(np.uint8)
for y in range(h):
color_val = normalized_complexity[y]
heatmap[y, :] = [0, 255 - color_val, color_val] # Green=low, Red=high
# Draw split lines
for split_y in split_points[1:-1]: # Skip first and last
cv2.line(vis, (0, split_y), (w, split_y), (0, 255, 0), 2)
# Combine
result = np.hstack([vis, heatmap])
return result
# ---------------------------------------------------------------------
# Encode Image to HTML
# ---------------------------------------------------------------------
def encode_image_to_html(image: Image.Image) -> str:
buffered = BytesIO()
image.save(buffered, format="PNG")
encoded = base64.b64encode(buffered.getvalue()).decode()
return f"""
<div style="height:500px; overflow-y:auto; border:1px solid #ccc;">
<img src="data:image/png;base64,{encoded}" style="width:100%;" />
</div>
"""