gazou / app.py
mook84816's picture
Update app.py
32b8282 verified
import gradio as gr
import numpy as np
from PIL import Image, ImageDraw
import tempfile
import zipfile
import os
from typing import Tuple, List, Dict
# =========================
# 基本
# =========================
def to_rgba(image: Image.Image) -> Image.Image:
return image.convert("RGBA") if image.mode != "RGBA" else image.copy()
def estimate_bg_color(arr: np.ndarray) -> Tuple[int, int, int]:
h, w = arr.shape[:2]
pad_y = max(8, h // 20)
pad_x = max(8, w // 20)
corners = [
arr[:pad_y, :pad_x, :3].reshape(-1, 3),
arr[:pad_y, -pad_x:, :3].reshape(-1, 3),
arr[-pad_y:, :pad_x, :3].reshape(-1, 3),
arr[-pad_y:, -pad_x:, :3].reshape(-1, 3),
]
samples = np.concatenate(corners, axis=0)
mean = samples.mean(axis=0)
return tuple(int(v) for v in mean)
def build_subject_mask(arr: np.ndarray, bg_color: Tuple[int, int, int], tolerance: int = 28) -> np.ndarray:
if arr.shape[2] == 4:
alpha = arr[:, :, 3]
if np.any(alpha < 250):
return alpha > 10
rgb = arr[:, :, :3].astype(np.int16)
bg = np.array(bg_color, dtype=np.int16).reshape(1, 1, 3)
diff = np.abs(rgb - bg).sum(axis=2)
return diff > tolerance
def get_bbox(mask: np.ndarray) -> Tuple[int, int, int, int]:
ys, xs = np.where(mask)
if len(xs) == 0 or len(ys) == 0:
h, w = mask.shape[:2]
return 0, 0, w - 1, h - 1
return int(xs.min()), int(ys.min()), int(xs.max()), int(ys.max())
def add_margin_to_bbox(
bbox: Tuple[int, int, int, int],
w: int,
h: int,
margin_ratio: float = 0.04
) -> Tuple[int, int, int, int]:
x1, y1, x2, y2 = bbox
mx = int((x2 - x1 + 1) * margin_ratio)
my = int((y2 - y1 + 1) * margin_ratio)
return (
max(0, x1 - mx),
max(0, y1 - my),
min(w - 1, x2 + mx),
min(h - 1, y2 + my),
)
def bbox_center(bbox: Tuple[int, int, int, int]) -> Tuple[float, float]:
x1, y1, x2, y2 = bbox
return ((x1 + x2) / 2.0, (y1 + y2) / 2.0)
def bbox_size(bbox: Tuple[int, int, int, int]) -> Tuple[int, int]:
x1, y1, x2, y2 = bbox
return (x2 - x1 + 1, y2 - y1 + 1)
def crop_safe(arr: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> np.ndarray:
h, w = arr.shape[:2]
x1 = max(0, min(w, x1))
x2 = max(0, min(w, x2))
y1 = max(0, min(h, y1))
y2 = max(0, min(h, y2))
if x2 <= x1 or y2 <= y1:
return np.zeros((1, 1, arr.shape[2]), dtype=arr.dtype)
return arr[y1:y2, x1:x2]
def region_diff_ratio(arr_a: np.ndarray, arr_b: np.ndarray, threshold: int = 20) -> float:
if arr_a.shape != arr_b.shape:
return 1.0
diff = np.abs(arr_a[:, :, :3].astype(np.int16) - arr_b[:, :, :3].astype(np.int16)).sum(axis=2)
changed = diff > threshold
return float(changed.mean())
# =========================
# 自動パラメータ推定
# =========================
def auto_params(image: Image.Image, out_size: int = 1024) -> Dict[str, int]:
img = to_rgba(image).resize((out_size, out_size), Image.LANCZOS)
arr = np.array(img)
bg = estimate_bg_color(arr)
mask = build_subject_mask(arr, bg, tolerance=28)
bbox = add_margin_to_bbox(get_bbox(mask), out_size, out_size, margin_ratio=0.03)
x1, y1, x2, y2 = bbox
bw = x2 - x1 + 1
bh = y2 - y1 + 1
width_ratio = bw / out_size
height_ratio = bh / out_size
if width_ratio > 0.55:
sway_px = 14
elif width_ratio > 0.38:
sway_px = 11
else:
sway_px = 8
eyelid_strength = 4 if height_ratio > 0.42 else 3
return {
"sway_px": int(sway_px),
"eyelid_strength": int(eyelid_strength),
"bg_tolerance": 28,
"out_size": int(out_size),
}
# =========================
# 触手中心推定
# =========================
def detect_strand_centers(
subject_mask: np.ndarray,
bbox: Tuple[int, int, int, int],
lower_start_ratio: float = 0.56
) -> List[int]:
"""
下部の列占有から、触手っぽい中心xを推定する。
"""
x1, y1, x2, y2 = bbox
bw = x2 - x1 + 1
bh = y2 - y1 + 1
start_y = y1 + int(bh * lower_start_ratio)
lower_mask = subject_mask[start_y:y2 + 1, x1:x2 + 1]
if lower_mask.size == 0:
return [x1 + bw // 2]
col_counts = lower_mask.sum(axis=0).astype(np.float32)
if col_counts.max() <= 0:
return [x1 + bw // 2]
# 軽く平滑化
kernel = np.array([1, 2, 3, 2, 1], dtype=np.float32)
kernel /= kernel.sum()
smooth = np.convolve(col_counts, kernel, mode="same")
thr = max(2.0, smooth.max() * 0.30)
peaks = []
for i in range(1, len(smooth) - 1):
if smooth[i] >= thr and smooth[i] >= smooth[i - 1] and smooth[i] >= smooth[i + 1]:
peaks.append(i)
# 近すぎるピークをまとめる
merged = []
min_gap = max(10, bw // 18)
for p in peaks:
if not merged:
merged.append(p)
elif p - merged[-1] < min_gap:
if smooth[p] > smooth[merged[-1]]:
merged[-1] = p
else:
merged.append(p)
if len(merged) == 0:
# フォールバック
centers = np.linspace(int(bw * 0.18), int(bw * 0.82), 5).astype(int).tolist()
else:
centers = merged
return [x1 + c for c in centers]
# =========================
# 差分生成
# =========================
def warp_tentacles_sway(
base_rgba: Image.Image,
subject_mask: np.ndarray,
bbox: Tuple[int, int, int, int],
sway_px: int = 12,
direction: int = 1,
lower_start_ratio: float = 0.56,
bg_color: Tuple[int, int, int] = (255, 255, 255)
) -> Image.Image:
"""
触手中心ごとに位相差つきの横オフセットを作って、下部だけ局所ワープする。
根元はほぼ固定、先端ほど大きく動く。
"""
arr = np.array(base_rgba).copy()
out = arr.copy()
h, w = arr.shape[:2]
x1, y1, x2, y2 = bbox
bw = x2 - x1 + 1
bh = y2 - y1 + 1
start_y = y1 + int(bh * lower_start_ratio)
centers = detect_strand_centers(subject_mask, bbox, lower_start_ratio=lower_start_ratio)
if len(centers) == 0:
centers = [x1 + bw // 2]
sigma = max(8.0, bw / max(6, len(centers) * 2))
bg_rgba = np.array([bg_color[0], bg_color[1], bg_color[2], 255], dtype=np.uint8)
yy, xx = np.mgrid[0:h, 0:w]
offset = np.zeros((h, w), dtype=np.float32)
for idx, cx in enumerate(centers):
sign = 1.0 if idx % 2 == 0 else -1.0
amp = direction * sway_px * sign
dx = xx - cx
wx = np.exp(-(dx * dx) / (2.0 * sigma * sigma))
t = np.clip((yy - start_y) / max(1, (y2 - start_y)), 0.0, 1.0)
wy = t ** 1.65
offset += amp * wx * wy
# 影響範囲をbbox付近の下部だけに制限
active = np.zeros((h, w), dtype=bool)
active[start_y:y2 + 1, max(0, x1 - 12):min(w, x2 + 13)] = True
active &= subject_mask
src_x = np.clip(np.round(xx - offset).astype(np.int32), 0, w - 1)
# 一旦動かす元を背景に戻す
out[active] = bg_rgba
# backward mapping
moved_pixels = arr[yy, src_x]
out[active] = moved_pixels[active]
return Image.fromarray(out, mode="RGBA")
def sample_head_fill_color(arr: np.ndarray, bbox):
x1, y1, x2, y2 = bbox
bw = x2 - x1 + 1
bh = y2 - y1 + 1
sx1 = x1 + int(bw * 0.30)
sx2 = x1 + int(bw * 0.70)
sy1 = y1 + int(bh * 0.18)
sy2 = y1 + int(bh * 0.32)
crop = arr[sy1:sy2, sx1:sx2, :3]
if crop.size == 0:
return (170, 225, 220, 255)
mean = crop.reshape(-1, 3).mean(axis=0)
return (int(mean[0]), int(mean[1]), int(mean[2]), 255)
def add_heavy_eyelids(
base_rgba: Image.Image,
bbox: Tuple[int, int, int, int],
line_strength: int = 4
) -> Image.Image:
"""
半目キャラ向けの弱い表情差分。
既存の目を大きく壊さず、
上まぶたを少しだけ重く見せる。
"""
img = base_rgba.copy()
draw = ImageDraw.Draw(img)
x1, y1, x2, y2 = bbox
bw = x2 - x1 + 1
bh = y2 - y1 + 1
# 線色は元絵の濃い輪郭寄り
line_color = (32, 49, 65, 255)
# 変化を弱くする
main_width = max(2, min(4, line_strength))
sub_width = max(1, main_width - 1)
# 左目:元の半目ラインに近い、短めの上まぶた強調
l_start = (x1 + int(bw * 0.21), y1 + int(bh * 0.465))
l_mid = (x1 + int(bw * 0.30), y1 + int(bh * 0.455))
l_end = (x1 + int(bw * 0.39), y1 + int(bh * 0.470))
# 右目
r_start = (x1 + int(bw * 0.61), y1 + int(bh * 0.470))
r_mid = (x1 + int(bw * 0.70), y1 + int(bh * 0.455))
r_end = (x1 + int(bw * 0.79), y1 + int(bh * 0.465))
# 主線
draw.line([l_start, l_mid, l_end], fill=line_color, width=main_width)
draw.line([r_start, r_mid, r_end], fill=line_color, width=main_width)
# ごく薄い補助線で「少し閉じた感」だけ足す
l2_start = (x1 + int(bw * 0.24), y1 + int(bh * 0.485))
l2_end = (x1 + int(bw * 0.37), y1 + int(bh * 0.490))
r2_start = (x1 + int(bw * 0.63), y1 + int(bh * 0.490))
r2_end = (x1 + int(bw * 0.76), y1 + int(bh * 0.485))
draw.line([l2_start, l2_end], fill=line_color, width=sub_width)
draw.line([r2_start, r2_end], fill=line_color, width=sub_width)
return img
# =========================
# QC
# =========================
def judge_level(pass_ok: bool, warn_ok: bool) -> str:
if pass_ok:
return "PASS"
if warn_ok:
return "WARN"
return "FAIL"
def worst_level(levels: List[str]) -> str:
if "FAIL" in levels:
return "FAIL"
if "WARN" in levels:
return "WARN"
return "PASS"
def compute_qc(
frame_01: Image.Image,
frame_02: Image.Image,
frame_03: Image.Image,
frame_04: Image.Image,
bg_color: Tuple[int, int, int],
bg_tolerance: int
) -> Dict[str, object]:
arr1 = np.array(frame_01)
arr2 = np.array(frame_02)
arr3 = np.array(frame_03)
arr4 = np.array(frame_04)
bboxes = []
for arr in [arr1, arr2, arr3, arr4]:
mask = build_subject_mask(arr, bg_color, tolerance=bg_tolerance)
bboxes.append(get_bbox(mask))
bbox1, bbox2, bbox3, bbox4 = bboxes
# 位置ズレ
c1, c2, c3, c4 = map(bbox_center, bboxes)
shift_02 = float(np.hypot(c2[0] - c1[0], c2[1] - c1[1]))
shift_03 = float(np.hypot(c3[0] - c1[0], c3[1] - c1[1]))
shift_04 = float(np.hypot(c4[0] - c1[0], c4[1] - c1[1]))
max_shift = max(shift_02, shift_03, shift_04)
pos_level = judge_level(max_shift <= 3.0, max_shift <= 8.0)
# サイズズレ
w1, h1 = bbox_size(bbox1)
def size_diff_pct(bbox: Tuple[int, int, int, int]) -> float:
w, h = bbox_size(bbox)
dw = abs(w - w1) / max(1, w1) * 100.0
dh = abs(h - h1) / max(1, h1) * 100.0
return max(dw, dh)
size_02 = size_diff_pct(bbox2)
size_03 = size_diff_pct(bbox3)
size_04 = size_diff_pct(bbox4)
max_size_diff = max(size_02, size_03, size_04)
size_level = judge_level(max_size_diff <= 2.0, max_size_diff <= 5.0)
x1, y1, x2, y2 = bbox1
bw = x2 - x1 + 1
bh = y2 - y1 + 1
# 上半分汚染
upper1 = crop_safe(arr1, x1, y1, x2 + 1, y1 + int(bh * 0.45))
upper2 = crop_safe(arr2, x1, y1, x2 + 1, y1 + int(bh * 0.45))
upper4 = crop_safe(arr4, x1, y1, x2 + 1, y1 + int(bh * 0.45))
upper_diff_02 = region_diff_ratio(upper1, upper2, threshold=20) * 100.0
upper_diff_04 = region_diff_ratio(upper1, upper4, threshold=20) * 100.0
max_upper_diff = max(upper_diff_02, upper_diff_04)
upper_level = judge_level(max_upper_diff <= 1.0, max_upper_diff <= 3.0)
# 目差分
eye1 = crop_safe(
arr1,
x1 + int(bw * 0.15),
y1 + int(bh * 0.36),
x1 + int(bw * 0.85),
y1 + int(bh * 0.60),
)
eye2 = crop_safe(
arr2,
x1 + int(bw * 0.15),
y1 + int(bh * 0.36),
x1 + int(bw * 0.85),
y1 + int(bh * 0.60),
)
eye3 = crop_safe(
arr3,
x1 + int(bw * 0.15),
y1 + int(bh * 0.36),
x1 + int(bw * 0.85),
y1 + int(bh * 0.60),
)
eye4 = crop_safe(
arr4,
x1 + int(bw * 0.15),
y1 + int(bh * 0.36),
x1 + int(bw * 0.85),
y1 + int(bh * 0.60),
)
eye_diff_02 = region_diff_ratio(eye1, eye2, threshold=20) * 100.0
eye_diff_03 = region_diff_ratio(eye1, eye3, threshold=20) * 100.0
eye_diff_04 = region_diff_ratio(eye1, eye4, threshold=20) * 100.0
eye_level = "PASS"
if eye_diff_03 < 0.8:
eye_level = "FAIL"
elif eye_diff_02 > 5.0 or eye_diff_04 > 5.0:
eye_level = "FAIL"
elif eye_diff_03 < 1.6 or eye_diff_02 > 2.0 or eye_diff_04 > 2.0:
eye_level = "WARN"
# 下半分差分
lower1 = crop_safe(arr1, x1, y1 + int(bh * 0.52), x2 + 1, y2 + 1)
lower2 = crop_safe(arr2, x1, y1 + int(bh * 0.52), x2 + 1, y2 + 1)
lower4 = crop_safe(arr4, x1, y1 + int(bh * 0.52), x2 + 1, y2 + 1)
lower_diff_02 = region_diff_ratio(lower1, lower2, threshold=20) * 100.0
lower_diff_04 = region_diff_ratio(lower1, lower4, threshold=20) * 100.0
min_lower_diff = min(lower_diff_02, lower_diff_04)
lower_level = judge_level(min_lower_diff >= 1.8, min_lower_diff >= 0.8)
overall = worst_level([pos_level, size_level, upper_level, eye_level, lower_level])
return {
"overall": overall,
"position_level": pos_level,
"position_max_shift_px": round(max_shift, 2),
"size_level": size_level,
"size_max_diff_pct": round(max_size_diff, 2),
"upper_level": upper_level,
"upper_max_diff_pct": round(max_upper_diff, 2),
"eye_level": eye_level,
"eye_diff_02_pct": round(eye_diff_02, 2),
"eye_diff_03_pct": round(eye_diff_03, 2),
"eye_diff_04_pct": round(eye_diff_04, 2),
"lower_level": lower_level,
"lower_diff_02_pct": round(lower_diff_02, 2),
"lower_diff_04_pct": round(lower_diff_04, 2),
"bbox1": bbox1,
"bbox2": bbox2,
"bbox3": bbox3,
"bbox4": bbox4,
}
def qc_to_text(qc: Dict[str, object], bg_color: Tuple[int, int, int], params: Dict[str, int], retry_count: int) -> str:
lines = []
lines.append(f"総合QC: {qc['overall']}")
lines.append(f"自動再試行回数: {retry_count}")
lines.append("")
lines.append(
f"使用パラメータ: sway_px={params['sway_px']} / "
f"eyelid_strength={params['eyelid_strength']} / "
f"bg_tolerance={params['bg_tolerance']} / "
f"out_size={params['out_size']}"
)
lines.append("")
lines.append(f"位置ズレ: {qc['position_level']} / 最大 {qc['position_max_shift_px']} px")
lines.append(f"サイズズレ: {qc['size_level']} / 最大 {qc['size_max_diff_pct']} %")
lines.append(f"上半分汚染: {qc['upper_level']} / 最大差分 {qc['upper_max_diff_pct']} %")
lines.append(
f"目差分: {qc['eye_level']} / "
f"frame_02={qc['eye_diff_02_pct']} % / "
f"frame_03={qc['eye_diff_03_pct']} % / "
f"frame_04={qc['eye_diff_04_pct']} %"
)
lines.append(
f"下半分差分: {qc['lower_level']} / "
f"frame_02={qc['lower_diff_02_pct']} % / "
f"frame_04={qc['lower_diff_04_pct']} %"
)
lines.append("")
lines.append(f"背景推定色: {bg_color}")
lines.append(f"bbox_01: {qc['bbox1']}")
lines.append(f"bbox_02: {qc['bbox2']}")
lines.append(f"bbox_03: {qc['bbox3']}")
lines.append(f"bbox_04: {qc['bbox4']}")
return "\n".join(lines)
# =========================
# 保存
# =========================
def save_png_frames(frames: List[Image.Image], out_dir: str) -> List[str]:
paths = []
for i, frame in enumerate(frames, start=1):
path = os.path.join(out_dir, f"frame_{i:02d}.png")
frame.save(path)
paths.append(path)
return paths
def make_animation_webp(frames: List[Image.Image], out_path: str, duration_ms: int = 180) -> None:
rgba_frames = [f.convert("RGBA") for f in frames]
rgba_frames[0].save(
out_path,
save_all=True,
append_images=rgba_frames[1:],
format="WEBP",
loop=0,
duration=duration_ms,
lossless=True,
quality=100
)
def make_zip(file_paths: List[str], zip_path: str) -> None:
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
for p in file_paths:
zf.write(p, arcname=os.path.basename(p))
# =========================
# 生成本体
# =========================
def render_frames_with_params(image: Image.Image, params: Dict[str, int]):
img = to_rgba(image).resize((params["out_size"], params["out_size"]), Image.LANCZOS)
arr = np.array(img)
bg_color = estimate_bg_color(arr)
subject_mask = build_subject_mask(arr, bg_color, tolerance=params["bg_tolerance"])
bbox = add_margin_to_bbox(get_bbox(subject_mask), params["out_size"], params["out_size"], margin_ratio=0.03)
frame_01 = img.copy()
frame_02 = warp_tentacles_sway(
base_rgba=img,
subject_mask=subject_mask,
bbox=bbox,
sway_px=params["sway_px"],
direction=1,
lower_start_ratio=0.56,
bg_color=bg_color,
)
frame_03 = add_heavy_eyelids(
base_rgba=img,
bbox=bbox,
line_strength=params["eyelid_strength"],
)
frame_04 = warp_tentacles_sway(
base_rgba=img,
subject_mask=subject_mask,
bbox=bbox,
sway_px=params["sway_px"],
direction=-1,
lower_start_ratio=0.56,
bg_color=bg_color,
)
qc = compute_qc(frame_01, frame_02, frame_03, frame_04, bg_color, params["bg_tolerance"])
return [frame_01, frame_02, frame_03, frame_04], qc, bg_color
def auto_generate(image: Image.Image):
if image is None:
return None, None, None, "画像をアップロードしてください", "未実行"
params = auto_params(image, out_size=1024)
retry_count = 0
best_frames = None
best_qc = None
best_bg = None
best_params = params.copy()
for _ in range(5):
frames, qc, bg_color = render_frames_with_params(image, params)
best_frames = frames
best_qc = qc
best_bg = bg_color
best_params = params.copy()
if qc["overall"] == "PASS":
break
retry_count += 1
if qc["lower_level"] in ["FAIL", "WARN"]:
params["sway_px"] = min(26, params["sway_px"] + 2)
if qc["upper_level"] in ["FAIL", "WARN"]:
params["sway_px"] = max(6, params["sway_px"] - 1)
if qc["eye_level"] == "FAIL":
params["eyelid_strength"] = min(8, params["eyelid_strength"] + 1)
elif qc["eye_level"] == "WARN" and qc["eye_diff_03_pct"] < 1.0:
params["eyelid_strength"] = min(8, params["eyelid_strength"] + 1)
qc_text = qc_to_text(best_qc, best_bg, best_params, retry_count)
workdir = tempfile.mkdtemp(prefix="auto_diff_frames_")
png_paths = save_png_frames(best_frames, workdir)
anim_frames = [
best_frames[0], best_frames[1], best_frames[0],
best_frames[3], best_frames[0], best_frames[2], best_frames[0]
]
webp_path = os.path.join(workdir, "preview_loop.webp")
make_animation_webp(anim_frames, webp_path, duration_ms=180)
qc_path = os.path.join(workdir, "qc_report.txt")
with open(qc_path, "w", encoding="utf-8") as f:
f.write(qc_text)
zip_path = os.path.join(workdir, "frames_png_with_qc.zip")
make_zip(png_paths + [webp_path, qc_path], zip_path)
status = f"完了 / 総合QC={best_qc['overall']} / 自動再試行={retry_count}回"
return png_paths, webp_path, zip_path, qc_text, status
# =========================
# UI
# =========================
with gr.Blocks(title="自動差分フレーム生成 + QC") as demo:
gr.Markdown("## 自動差分フレーム生成 + QC")
gr.Markdown(
"画像を1枚読み込み、アプリ内部で自動処理して差分4枚を別ファイルで生成します。 \n"
"出力: frame_01.png / frame_02.png / frame_03.png / frame_04.png / preview_loop.webp / qc_report.txt / ZIP"
)
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(type="pil", image_mode="RGBA", label="元画像")
run_btn = gr.Button("自動生成する", variant="primary")
with gr.Column(scale=1):
gallery = gr.Gallery(label="生成された4枚", columns=2, height="auto")
preview = gr.Image(label="アニメプレビュー(WebP)", type="filepath")
zip_file = gr.File(label="ZIPダウンロード")
qc_report = gr.Textbox(label="QCレポート", lines=16, interactive=False)
status = gr.Textbox(label="ステータス", interactive=False)
run_btn.click(
fn=auto_generate,
inputs=[image_input],
outputs=[gallery, preview, zip_file, qc_report, status]
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)