hongyu12321's picture
Update app.py
c7038b7 verified
import os
import sys
import urllib.request
from typing import Optional
import cv2
import numpy as np
import gradio as gr
# ==============================
# Optional x2 Super-Resolution
# ==============================
EDSR_X2_URL = "https://github.com/opencv/opencv_contrib/raw/4.x/modules/dnn_superres/samples/EDSR_x2.pb"
EDSR_X2_PATH = "EDSR_x2.pb"
def _ensure_edsr_model():
if not os.path.exists(EDSR_X2_PATH):
try:
urllib.request.urlretrieve(EDSR_X2_URL, EDSR_X2_PATH)
except Exception as e:
print(f"[warn] Failed to download EDSR model: {e}", file=sys.stderr)
def upscale_x2(rgb: np.ndarray) -> np.ndarray:
_ensure_edsr_model()
if not os.path.exists(EDSR_X2_PATH):
return rgb
try:
sr = cv2.dnn_superres.DnnSuperResImpl_create()
sr.readModel(EDSR_X2_PATH)
sr.setModel("edsr", 2)
bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
up = sr.upsample(bgr)
return cv2.cvtColor(up, cv2.COLOR_BGR2RGB)
except Exception as e:
print(f"[warn] SR failed: {e}", file=sys.stderr)
return rgb
# ==============================
# Light helpers (color & texture)
# ==============================
def vivid_boost(rgb: np.ndarray, sat_gain=1.15, clahe_clip=1.5) -> np.ndarray:
"""Slight saturation + CLAHE on V channel (portrait-friendly)."""
hsv = cv2.cvtColor(rgb, cv2.COLOR_RGB2HSV)
h, s, v = cv2.split(hsv)
s = np.clip(s.astype(np.float32) * float(sat_gain), 0, 255).astype(np.uint8)
clahe = cv2.createCLAHE(clipLimit=float(clahe_clip), tileGridSize=(8, 8))
v = clahe.apply(v)
hsv2 = cv2.merge([h, s, v])
return cv2.cvtColor(hsv2, cv2.COLOR_HSV2RGB)
def add_paper_texture(rgb: np.ndarray, strength=0.08, scale=6) -> np.ndarray:
"""Subtle paper grain."""
h, w = rgb.shape[:2]
noise = cv2.randn(np.zeros((h, w, 1), dtype=np.float32), 0, 25)
k = int(max(3, round(min(h, w) / (100 / max(1, scale)))))
if k % 2 == 0:
k += 1
noise = cv2.GaussianBlur(noise, (k, k), 0)
noise = cv2.normalize(noise, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
noise = cv2.cvtColor(noise, cv2.COLOR_GRAY2RGB)
return cv2.addWeighted(rgb, 1.0, noise, float(strength), 0)
# ==============================
# Large-model subject mask (DeeplabV3 → graceful fallback)
# ==============================
_torch_ok = False
try:
import torch
import torchvision
from torchvision import transforms as T
_torch_ok = True
except Exception as e:
print(f"[warn] PyTorch/torchvision not available: {e}", file=sys.stderr)
_DEEPLAB = None
_DEEPLAB_TRANSFORM = None
def _load_deeplab() -> bool:
global _DEEPLAB, _DEEPLAB_TRANSFORM
if not _torch_ok:
return False
if _DEEPLAB is None:
try:
_DEEPLAB = torchvision.models.segmentation.deeplabv3_resnet50(weights="DEFAULT").eval()
_DEEPLAB_TRANSFORM = T.Compose([
T.ToPILImage(),
T.Resize(512, interpolation=T.InterpolationMode.BILINEAR),
T.ToTensor(),
T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])
return True
except Exception as e:
print(f"[warn] Failed to load DeeplabV3: {e}", file=sys.stderr)
return False
return True
def subject_mask_rgb(rgb: np.ndarray) -> Optional[np.ndarray]:
"""Return float mask [H,W]∈[0,1] for 'person' if available, else None."""
if not _load_deeplab():
return None
try:
with torch.no_grad():
inp = _DEEPLAB_TRANSFORM(rgb).unsqueeze(0)
out = _DEEPLAB(inp)["out"][0] # [C,H,W]
probs = torch.softmax(out, dim=0)[15] # class 15 = person (VOC)
mask = probs.cpu().numpy()
mask = cv2.resize(mask, (rgb.shape[1], rgb.shape[0]), interpolation=cv2.INTER_LINEAR)
mask = cv2.GaussianBlur(mask, (21, 21), 0)
return np.clip(mask, 0.0, 1.0)
except Exception as e:
print(f"[warn] Deeplab inference failed: {e}", file=sys.stderr)
return None
def subject_aware_blend(orig_rgb: np.ndarray, stylized_rgb: np.ndarray,
mask: Optional[np.ndarray], strength=0.25) -> np.ndarray:
"""Blend some original back on subject (face/hair/clothes)."""
if mask is None:
# Fallback: MediaPipe selfie segmentation
try:
import mediapipe as mp
with mp.solutions.selfie_segmentation.SelfieSegmentation(model_selection=1) as selfie:
m = selfie.process(cv2.cvtColor(orig_rgb, cv2.COLOR_RGB2BGR)).segmentation_mask
m = cv2.GaussianBlur(m, (21, 21), 0)
mask = np.clip(m, 0.0, 1.0)
except Exception:
mask = None
if mask is None:
return stylized_rgb
alpha = (mask * float(strength)).astype(np.float32)[..., None]
out = (stylized_rgb.astype(np.float32) * (1.0 - alpha) +
orig_rgb.astype(np.float32) * alpha)
return np.clip(out, 0, 255).astype(np.uint8)
# ==============================
# Style 1: Color Pencil (optimized for portraits)
# ==============================
def color_pencil_core(img: np.ndarray, sigma_s=80, sigma_r=0.10, shade_factor=0.04) -> np.ndarray:
bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
_, color_sketch = cv2.pencilSketch(
bgr,
sigma_s=float(sigma_s),
sigma_r=float(sigma_r),
shade_factor=float(shade_factor)
)
return cv2.cvtColor(color_sketch, cv2.COLOR_BGR2RGB)
def color_pencil_pipeline(
img: np.ndarray,
sigma_s=80, sigma_r=0.10, shade_factor=0.04,
vivid=True, sat_gain=1.15, clahe_clip=1.5,
subject_aware=True, subject_blend=0.25,
paper=False, paper_strength=0.08, paper_scale=6,
upscale=False
) -> np.ndarray:
sketch = color_pencil_core(img, sigma_s, sigma_r, shade_factor)
if vivid:
sketch = vivid_boost(sketch, sat_gain=sat_gain, clahe_clip=clahe_clip)
if subject_aware:
mask = subject_mask_rgb(img)
sketch = subject_aware_blend(img, sketch, mask, strength=subject_blend)
if paper:
sketch = add_paper_texture(sketch, strength=paper_strength, scale=paper_scale)
if upscale:
sketch = upscale_x2(sketch)
return sketch
# ==============================
# Style 2: Oil Painting (classic or neural fast style)
# ==============================
_fast_style = None
_fast_style_name = None
def _load_fast_style(model_name="mosaic") -> bool:
"""Torch hub fast neural style (mosaic/udnie)."""
global _fast_style, _fast_style_name
if not _torch_ok:
return False
if _fast_style is not None and _fast_style_name == model_name:
return True
try:
_fast_style = torch.hub.load("pytorch/vision", "fast_neural_style",
model=model_name, trust_repo=True).eval()
_fast_style_name = model_name
return True
except Exception as e:
print(f"[warn] fast_neural_style '{model_name}' unavailable: {e}", file=sys.stderr)
return False
def oil_painting_pipeline(img: np.ndarray, engine="classic", brush=7, dyn_ratio=15,
bilateral=False, fast_model="mosaic", upscale=False) -> np.ndarray:
if engine == "neural" and _load_fast_style(fast_model):
try:
from torchvision import transforms as T
with torch.no_grad():
x = T.ToTensor()(T.ToPILImage()(img)).unsqueeze(0)
y = _fast_style(x).clamp(0, 1)[0].permute(1, 2, 0).cpu().numpy()
rgb = (y * 255).astype(np.uint8)
except Exception as e:
print(f"[warn] Neural style failed, fallback to classic: {e}", file=sys.stderr)
engine = "classic"
if engine != "neural":
bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
if bilateral:
bgr = cv2.bilateralFilter(bgr, d=7, sigmaColor=75, sigmaSpace=75)
try:
oil = cv2.xphoto.oilPainting(bgr, size=int(brush), dynRatio=int(dyn_ratio))
except Exception as e:
print(f"[warn] xphoto.oilPainting unavailable, using edgePreservingFilter: {e}", file=sys.stderr)
oil = cv2.edgePreservingFilter(bgr, sigma_s=60, sigma_r=0.4)
rgb = cv2.cvtColor(oil, cv2.COLOR_BGR2RGB)
if upscale:
rgb = upscale_x2(rgb)
return rgb
# ==============================
# Style 3: Cartoon (bright ink overlay + AI-light option)
# ==============================
def cartoon_classic(
img: np.ndarray,
bilateral_iter=6, sigma_color=75, sigma_space=75,
edge_blur=3, mode="adaptive",
line_thickness=2,
) -> np.ndarray:
"""Flat colors via bilateral; draw BLACK ink lines on top (no dark background)."""
bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# 1) Color smoothing
color = bgr.copy()
for _ in range(int(bilateral_iter)):
color = cv2.bilateralFilter(color, d=9,
sigmaColor=float(sigma_color),
sigmaSpace=float(sigma_space))
# 2) Edge detection on gray
gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
if int(edge_blur) > 0:
k = int(edge_blur) if int(edge_blur) % 2 == 1 else int(edge_blur) + 1
gray = cv2.medianBlur(gray, k)
if mode == "canny":
edges = cv2.Canny(gray, 80, 160) # 255 = edge (white)
else:
edges = cv2.adaptiveThreshold(gray, 255,
cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 9, 2)
# 3) Clean/densify lines
if line_thickness > 1:
kernel = np.ones((line_thickness, line_thickness), np.uint8)
edges = cv2.dilate(edges, kernel, iterations=1)
edges = cv2.morphologyEx(edges, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8))
# 4) Draw black ink on smoothed color where edges==255
ink = color.copy()
ink[edges == 255] = (0, 0, 0)
return cv2.cvtColor(ink, cv2.COLOR_BGR2RGB)
def cartoon_ai_light(img: np.ndarray) -> np.ndarray:
"""Portable 'anime-ish' look: OpenCV stylization + vivid pop."""
bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
styl = cv2.stylization(bgr, sigma_s=60, sigma_r=0.07)
rgb = cv2.cvtColor(styl, cv2.COLOR_BGR2RGB)
return vivid_boost(rgb, sat_gain=1.2, clahe_clip=1.6)
def cartoon_pipeline(
img: np.ndarray,
engine="classic",
bilateral_iter=6, sigma_color=75, sigma_space=75,
edge_blur=3, mode="adaptive",
line_thickness=2,
portrait_soften=True, portrait_blend=0.18,
upscale=False
) -> np.ndarray:
if engine == "ai-light":
rgb = cartoon_ai_light(img)
else:
rgb = cartoon_classic(
img,
bilateral_iter=bilateral_iter, sigma_color=sigma_color, sigma_space=sigma_space,
edge_blur=edge_blur, mode=mode, line_thickness=line_thickness
)
if portrait_soften:
mask = subject_mask_rgb(img)
rgb = subject_aware_blend(img, rgb, mask, strength=portrait_blend)
if upscale:
rgb = upscale_x2(rgb)
return rgb
# ==============================
# Dispatcher
# ==============================
STYLES = ["Color Pencil (optimized)", "Oil Painting", "Cartoon"]
def run_pipeline(
img, style,
# Pencil
p_sigma_s, p_sigma_r, p_shade, p_vivid, p_sat, p_clahe,
p_subject_aware, p_subject_blend, p_paper, p_paper_strength, p_paper_scale,
# Oil
o_engine, o_brush, o_dyn, o_bilateral, o_fast_model,
# Cartoon
c_engine, c_bi_iter, c_sigma_color, c_sigma_space, c_edge_blur, c_edge_mode,
c_line_thickness, c_portrait_soften, c_portrait_blend,
# Global
g_upscale
):
if img is None:
raise gr.Error("No image received. Please upload or use the webcam.")
if style == "Color Pencil (optimized)":
return color_pencil_pipeline(
img,
sigma_s=p_sigma_s, sigma_r=p_sigma_r, shade_factor=p_shade,
vivid=p_vivid, sat_gain=p_sat, clahe_clip=p_clahe,
subject_aware=p_subject_aware, subject_blend=p_subject_blend,
paper=p_paper, paper_strength=p_paper_strength, paper_scale=p_paper_scale,
upscale=g_upscale
)
if style == "Oil Painting":
return oil_painting_pipeline(
img,
engine=o_engine, brush=o_brush, dyn_ratio=o_dyn,
bilateral=o_bilateral, fast_model=o_fast_model, upscale=g_upscale
)
# Cartoon
return cartoon_pipeline(
img,
engine=c_engine, bilateral_iter=c_bi_iter, sigma_color=c_sigma_color,
sigma_space=c_sigma_space, edge_blur=c_edge_blur, mode=c_edge_mode,
line_thickness=c_line_thickness,
portrait_soften=c_portrait_soften, portrait_blend=c_portrait_blend,
upscale=g_upscale
)
# ==============================
# Gradio UI
# ==============================
with gr.Blocks(title="Photo → Art Styles (AI-Upgraded)") as demo:
gr.Markdown(
"Choose **Color Pencil**, **Oil Painting**, or **Cartoon**. "
"Optional **subject-aware** AI keeps faces natural using a segmentation LMM (DeeplabV3)."
)
with gr.Row():
with gr.Column(scale=1):
inp = gr.Image(type="numpy", sources=["upload", "webcam"], label="Input")
style = gr.Dropdown(STYLES, value="Color Pencil (optimized)", label="Style")
# ----- Pencil -----
with gr.Accordion("Color Pencil Controls", open=True):
p_sigma_s = gr.Slider(10, 200, value=80, step=1, label="sigma_s (spatial smoothing)")
p_sigma_r = gr.Slider(0.0, 1.0, value=0.10, step=0.01, label="sigma_r (range sensitivity)")
p_shade = gr.Slider(0.00, 0.10, value=0.04, step=0.005, label="shade_factor (shading strength)")
p_vivid = gr.Checkbox(True, label="Vivid mode (saturation + local contrast)")
p_sat = gr.Slider(1.0, 2.0, value=1.15, step=0.05, label="Saturation gain")
p_clahe = gr.Slider(1.0, 4.0, value=1.5, step=0.1, label="CLAHE clip limit")
p_subject_aware = gr.Checkbox(True, label="AI subject-aware (gentler faces)")
p_subject_blend = gr.Slider(0.0, 0.8, value=0.25, step=0.05, label="Subject blend strength")
p_paper = gr.Checkbox(False, label="Add paper texture")
p_paper_strength = gr.Slider(0.0, 0.3, value=0.08, step=0.01, label="Paper strength")
p_paper_scale = gr.Slider(1, 12, value=6, step=1, label="Paper fiber scale")
# ----- Oil -----
with gr.Accordion("Oil Painting Controls", open=False):
o_engine = gr.Radio(choices=["classic", "neural"], value="classic", label="Engine")
o_brush = gr.Slider(3, 15, value=7, step=1, label="Brush size (classic)")
o_dyn = gr.Slider(5, 50, value=15, step=1, label="Dynamics ratio (classic)")
o_bilateral = gr.Checkbox(False, label="Pre-smooth with bilateral (classic)")
o_fast_model = gr.Radio(choices=["mosaic", "udnie"], value="mosaic", label="Neural style model")
# ----- Cartoon -----
with gr.Accordion("Cartoon Controls", open=False):
c_engine = gr.Radio(choices=["classic", "ai-light"], value="classic", label="Engine")
c_bi_iter = gr.Slider(1, 10, value=6, step=1, label="Bilateral iterations (classic)")
c_sigma_color = gr.Slider(10, 150, value=75, step=1, label="Bilateral sigmaColor (classic)")
c_sigma_space = gr.Slider(10, 150, value=75, step=1, label="Bilateral sigmaSpace (classic)")
c_edge_blur = gr.Slider(0, 11, value=3, step=2, label="Median blur for edges (odd, 0=off)")
c_edge_mode = gr.Radio(choices=["adaptive", "canny"], value="adaptive", label="Edge mode (classic)")
c_line_thickness = gr.Slider(1, 5, value=2, step=1, label="Line thickness (classic)")
c_portrait_soften = gr.Checkbox(True, label="Soften face with AI blend")
c_portrait_blend = gr.Slider(0.0, 0.8, value=0.18, step=0.02, label="Face blend strength")
# ----- Global -----
with gr.Accordion("Global Options", open=False):
g_upscale = gr.Checkbox(False, label="AI Upscale x2 (OpenCV EDSR)")
btn = gr.Button("Transform")
with gr.Column(scale=1):
out = gr.Image(type="numpy", label="Output")
gr.Markdown(
"**Tips** \n"
"- For portraits, **Color Pencil** with *AI subject-aware ON* keeps skin clean. \n"
"- Try **Oil → neural (mosaic/udnie)** for painterly canvas looks. \n"
"- **Cartoon** uses bright ink overlay; adjust *Line thickness* for bolder lines."
)
inputs = [
inp, style,
p_sigma_s, p_sigma_r, p_shade, p_vivid, p_sat, p_clahe,
p_subject_aware, p_subject_blend, p_paper, p_paper_strength, p_paper_scale,
o_engine, o_brush, o_dyn, o_bilateral, o_fast_model,
c_engine, c_bi_iter, c_sigma_color, c_sigma_space, c_edge_blur, c_edge_mode,
c_line_thickness, c_portrait_soften, c_portrait_blend,
g_upscale
]
btn.click(run_pipeline, inputs=inputs, outputs=out)
for w in inputs:
w.change(run_pipeline, inputs=inputs, outputs=out)
if __name__ == "__main__":
demo.launch()