Face-blurring

Running

App Files Files Community

seawolf2357 commited on Dec 31, 2025

Commit

b1e5e5b

verified ·

1 Parent(s): e8db4b2

Update app.py

Browse files

Files changed (1) hide show

app.py +486 -605

app.py CHANGED Viewed

@@ -1,266 +1,255 @@
-import gradio as gr
-import cv2
-import numpy as np
-import tempfile
-import os
 from pathlib import Path
-from typing import Optional, Tuple
 import torch
-from PIL import Image
-# ==============================
-# Model loader
-# ==============================
-def load_model(model_path: str = "yolov8-face-hf.pt", device: Optional[str] = None):
-    from ultralytics import YOLO
-    if device is None:
-        if torch.cuda.is_available():
-            device = "cuda"
-        elif torch.backends.mps.is_available():
-            device = "mps"
-        else:
-            device = "cpu"
-    model = YOLO(model_path)
-    model.to(device)
-    return model, device
-# Load model globally
-model, device = load_model()
-# ==============================
-# Helper functions
-# ==============================
-def _ensure_odd(x: int) -> int:
-    return x if x % 2 == 1 else x + 1
-def _choose_writer_size(w: int, h: int) -> Tuple[int, int]:
-    return (w if w % 2 == 0 else w - 1, h if h % 2 == 0 else h - 1)
-def _apply_anonymization(face_roi: np.ndarray, mode: str, blur_kernel: int, mosaic: int = 15) -> np.ndarray:
-    if face_roi.size == 0:
-        return face_roi
-    if mode == "Gaussian Blur":
-        k = _ensure_odd(max(blur_kernel, 15))
-        return cv2.GaussianBlur(face_roi, (k, k), 0)
-    else:
-        m = max(2, mosaic)
-        h, w = face_roi.shape[:2]
-        face_small = cv2.resize(face_roi, (max(1, w // m), max(1, h // m)), interpolation=cv2.INTER_LINEAR)
-        return cv2.resize(face_small, (w, h), interpolation=cv2.INTER_NEAREST)
-def blur_faces_image(image_bgr, conf, iou, expand_ratio, mode, blur_kernel, mosaic):
-    h, w = image_bgr.shape[:2]
-    face_count = 0
-    with torch.no_grad():
-        results = model.predict(image_bgr, conf=conf, iou=iou, verbose=False, device=device)
-    for r in results:
-        boxes = r.boxes.xyxy.cpu().numpy() if hasattr(r.boxes, "xyxy") else []
-        face_count = len(boxes)
-        for x1, y1, x2, y2 in boxes:
-            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
-            if expand_ratio > 0:
-                bw = x2 - x1
-                bh = y2 - y1
-                dx = int(bw * expand_ratio)
-                dy = int(bh * expand_ratio)
-                x1 -= dx; y1 -= dy; x2 += dx; y2 += dy
-            x1 = max(0, min(w, x1))
-            x2 = max(0, min(w, x2))
-            y1 = max(0, min(h, y1))
-            y2 = max(0, min(h, y2))
-            if x2 <= x1 or y2 <= y1:
-                continue
-            roi = image_bgr[y1:y2, x1:x2]
-            image_bgr[y1:y2, x1:x2] = _apply_anonymization(roi, mode, blur_kernel, mosaic)
-    return image_bgr, face_count
-def blur_faces_video(input_path, conf, iou, expand_ratio, mode, blur_kernel, mosaic, progress=gr.Progress()):
-    from moviepy.editor import VideoFileClip
-    cap = cv2.VideoCapture(input_path)
-    if not cap.isOpened():
-        raise IOError("Cannot open video")
-    in_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    in_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
-    frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
-    out_w, out_h = _choose_writer_size(in_w, in_h)
-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-    temp_video_path = tempfile.NamedTemporaryFile(delete=False, suffix="_temp.mp4").name
-    output_path = tempfile.NamedTemporaryFile(delete=False, suffix="_blurred.mp4").name
-    out = cv2.VideoWriter(temp_video_path, fourcc, fps, (out_w, out_h))
-    idx = 0
-    total_faces = 0
     try:
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-            frame = cv2.resize(frame, (out_w, out_h))
             with torch.no_grad():
-                results = model.predict(frame, conf=conf, iou=iou, verbose=False, device=device)
-            h, w = frame.shape[:2]
-            r0 = results[0] if len(results) else None
-            boxes = r0.boxes.xyxy if (r0 and hasattr(r0, "boxes")) else []
-            total_faces += len(boxes)
-            for b in boxes:
-                x1, y1, x2, y2 = map(int, b)
-                if expand_ratio > 0:
-                    bw = x2 - x1
-                    bh = y2 - y1
-                    dx = int(bw * expand_ratio)
-                    dy = int(bh * expand_ratio)
-                    x1 -= dx; y1 -= dy; x2 += dx; y2 += dy
-                x1 = max(0, min(w, x1))
-                x2 = max(0, min(w, x2))
-                y1 = max(0, min(h, y1))
-                y2 = max(0, min(h, y2))
-                if x2 <= x1 or y2 <= y1:
-                    continue
-                roi = frame[y1:y2, x1:x2]
-                frame[y1:y2, x1:x2] = _apply_anonymization(roi, mode, blur_kernel, mosaic)
-            out.write(frame)
-            idx += 1
-            if frames > 0:
-                progress(idx / frames, desc=f"Processing frame {idx}/{frames}")
-    finally:
-        cap.release()
-        out.release()
     try:
-        progress(0.95, desc="Merging audio...")
-        original = VideoFileClip(input_path)
-        processed = VideoFileClip(temp_video_path).set_audio(original.audio)
-        processed.write_videofile(
-            output_path,
-            codec="libx264",
-            audio_codec="aac",
-            threads=1,
-            logger=None
-        )
-        original.close()
-        processed.close()
-        return output_path, total_faces, frames
     except Exception as e:
-        print("Audio merging failed:", e)
-        return temp_video_path, total_faces, frames
-# ==============================
-# Main Processing Functions
-# ==============================
-def process_image(image, conf, iou, expand_ratio, mode_choice, blur_intensity, mosaic_size):
-    if image is None:
-        return None, "⚠️ Please upload an image first!"
-    # Convert PIL to BGR
-    image_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-    h, w = image_bgr.shape[:2]
-    # Determine blur settings
-    if mode_choice == "Gaussian Blur":
-        blur_kernel = blur_intensity
-        mosaic = 15
-    else:
-        blur_kernel = 51
-        mosaic = mosaic_size
-    # Process
-    result_bgr, face_count = blur_faces_image(
-        image_bgr.copy(), conf, iou, expand_ratio,
-        mode_choice, blur_kernel, mosaic
-    )
-    # Convert back to RGB
-    result_rgb = cv2.cvtColor(result_bgr, cv2.COLOR_BGR2RGB)
-    result_pil = Image.fromarray(result_rgb)
-    # Generate log
-    info_log = f"""✅ IMAGE PROCESSING COMPLETE!
-{'=' * 50}
-🖼️ Image Info:
-   • Size: {w} x {h} pixels
-   • Format: RGB
 {'=' * 50}
-🔍 Detection Settings:
-   • Confidence: {conf}
-   • IoU Threshold: {iou}
-   • Box Expansion: {expand_ratio}
 {'=' * 50}
-🎨 Blur Settings:
-   • Style: {mode_choice}
-   • Intensity: {blur_intensity if mode_choice == "Gaussian Blur" else mosaic_size}
 {'=' * 50}
-👤 Results:
-   • Faces Detected: {face_count}
-   • Faces Blurred: {face_count}
 {'=' * 50}
-💾 Ready to download!"""
-    return result_pil, info_log
-def process_video(video, conf, iou, expand_ratio, mode_choice, blur_intensity, mosaic_size, progress=gr.Progress()):
-    if video is None:
-        return None, "⚠️ Please upload a video first!"
-    # Determine blur settings
-    if mode_choice == "Gaussian Blur":
-        blur_kernel = blur_intensity
-        mosaic = 15
-    else:
-        blur_kernel = 51
-        mosaic = mosaic_size
-    try:
-        output_path, total_faces, total_frames = blur_faces_video(
-            video, conf, iou, expand_ratio,
-            mode_choice, blur_kernel, mosaic, progress
-        )
-        info_log = f"""✅ VIDEO PROCESSING COMPLETE!
-{'=' * 50}
-🎥 Video Info:
-   • Total Frames: {total_frames}
-   • Output Path: {os.path.basename(output_path)}
 {'=' * 50}
-🔍 Detection Settings:
-   • Confidence: {conf}
-   • IoU Threshold: {iou}
-   • Box Expansion: {expand_ratio}
 {'=' * 50}
-🎨 Blur Settings:
-   • Style: {mode_choice}
-   • Intensity: {blur_intensity if mode_choice == "Gaussian Blur" else mosaic_size}
 {'=' * 50}
-👤 Results:
-   • Total Faces Detected: {total_faces}
-   • Frames Processed: {total_frames}
 {'=' * 50}
-💾 Ready to download!"""
-        return output_path, info_log
-    except Exception as e:
-        return None, f"❌ Error: {str(e)}"
 # ============================================
@@ -324,7 +313,7 @@ a[href*="huggingface.co/spaces"] {
 /* ===== 메인 컨테이너 ===== */
 #col-container {
-    max-width: 1400px;
     margin: 0 auto;
 }
@@ -353,36 +342,42 @@ a[href*="huggingface.co/spaces"] {
     font-weight: 700 !important;
 }
-/* ===== 🎨 Stats 카드 ===== */
-.stats-row {
-    display: flex !important;
-    justify-content: center !important;
-    gap: 1rem !important;
-    margin: 1.5rem 0 !important;
-    flex-wrap: wrap !important;
-}
-.stat-card {
-    background: linear-gradient(135deg, #3B82F6 0%, #8B5CF6 100%) !important;
     border: 3px solid #1F2937 !important;
     border-radius: 12px !important;
-    padding: 1rem 1.5rem !important;
-    text-align: center !important;
-    box-shadow: 4px 4px 0px #1F2937 !important;
-    min-width: 120px !important;
 }
-.stat-card .emoji {
-    font-size: 2rem !important;
-    display: block !important;
-    margin-bottom: 0.3rem !important;
 }
-.stat-card .label {
     color: #FFFFFF !important;
-    font-family: 'Comic Neue', cursive !important;
-    font-weight: 700 !important;
-    font-size: 0.9rem !important;
 }
 /* ===== 🎨 카드/패널 - 만화 프레임 스타일 ===== */
@@ -404,41 +399,7 @@ a[href*="huggingface.co/spaces"] {
     box-shadow: 8px 8px 0px #1F2937 !important;
 }
-/* ===== 🎨 탭 스타일 ===== */
-.gr-tabs {
-    border: 3px solid #1F2937 !important;
-    border-radius: 12px !important;
-    overflow: hidden !important;
-    box-shadow: 6px 6px 0px #1F2937 !important;
-}
-.gr-tab-nav {
-    background: #FACC15 !important;
-    border-bottom: 3px solid #1F2937 !important;
-}
-.gr-tab-nav button {
-    font-family: 'Bangers', cursive !important;
-    font-size: 1.2rem !important;
-    letter-spacing: 1px !important;
-    color: #1F2937 !important;
-    padding: 12px 24px !important;
-    border: none !important;
-    background: transparent !important;
-    transition: all 0.2s ease !important;
-}
-.gr-tab-nav button:hover {
-    background: #FDE68A !important;
-}
-.gr-tab-nav button.selected {
-    background: #3B82F6 !important;
-    color: #FFFFFF !important;
-    text-shadow: 1px 1px 0px #1F2937 !important;
-}
-/* ===== 🎨 입력 필드 ===== */
 textarea,
 input[type="text"],
 input[type="number"] {
@@ -460,54 +421,15 @@ input[type="number"]:focus {
     outline: none !important;
 }
-/* ===== 🎨 드롭다운 스타일 ===== */
-.gr-dropdown {
-    background: #FFFFFF !important;
-    border: 3px solid #1F2937 !important;
-    border-radius: 8px !important;
-    box-shadow: 3px 3px 0px #1F2937 !important;
-}
-.gr-dropdown > div {
-    background: #FFFFFF !important;
-    border: none !important;
-}
-.gr-dropdown input {
-    color: #1F2937 !important;
-    font-family: 'Comic Neue', cursive !important;
-    font-weight: 700 !important;
-}
-.gr-dropdown ul {
-    background: #FFFFFF !important;
-    border: 3px solid #1F2937 !important;
-    border-radius: 8px !important;
-    box-shadow: 4px 4px 0px #1F2937 !important;
-}
-.gr-dropdown ul li {
-    color: #1F2937 !important;
-    font-family: 'Comic Neue', cursive !important;
-    font-weight: 700 !important;
-    padding: 8px 12px !important;
-}
-.gr-dropdown ul li:hover {
-    background: #FACC15 !important;
-    color: #1F2937 !important;
-}
-.gr-dropdown ul li.selected {
-    background: #3B82F6 !important;
-    color: #FFFFFF !important;
 }
-/* ===== 🎨 Primary 버튼 ===== */
 .gr-button-primary,
 button.primary,
-.gr-button.primary,
-.process-btn {
     background: #3B82F6 !important;
     border: 3px solid #1F2937 !important;
     border-radius: 8px !important;
@@ -524,8 +446,7 @@ button.primary,
 .gr-button-primary:hover,
 button.primary:hover,
-.gr-button.primary:hover,
-.process-btn:hover {
     background: #2563EB !important;
     transform: translate(-2px, -2px) !important;
     box-shadow: 7px 7px 0px #1F2937 !important;
@@ -533,12 +454,34 @@ button.primary:hover,
 .gr-button-primary:active,
 button.primary:active,
-.gr-button.primary:active,
-.process-btn:active {
     transform: translate(3px, 3px) !important;
     box-shadow: 2px 2px 0px #1F2937 !important;
 }
 /* ===== 🎨 로그 출력 영역 ===== */
 .info-log textarea {
     background: #1F2937 !important;
@@ -551,11 +494,11 @@ button.primary:active,
     box-shadow: 4px 4px 0px #10B981 !important;
 }
-/* ===== 🎨 이미지/비디오 영역 ===== */
-.gr-image,
 .gr-video,
-.image-container,
-.video-container {
     border: 4px solid #1F2937 !important;
     border-radius: 8px !important;
     box-shadow: 8px 8px 0px #1F2937 !important;
@@ -563,6 +506,27 @@ button.primary:active,
     background: #FFFFFF !important;
 }
 /* ===== 🎨 슬라이더 스타일 ===== */
 input[type="range"] {
     accent-color: #3B82F6 !important;
@@ -572,21 +536,6 @@ input[type="range"] {
     background: #FFFFFF !important;
 }
-/* ===== 🎨 아코디언 ===== */
-.gr-accordion {
-    background: #FACC15 !important;
-    border: 3px solid #1F2937 !important;
-    border-radius: 8px !important;
-    box-shadow: 4px 4px 0px #1F2937 !important;
-}
-.gr-accordion-header {
-    color: #1F2937 !important;
-    font-family: 'Comic Neue', cursive !important;
-    font-weight: 700 !important;
-    font-size: 1.1rem !important;
-}
 /* ===== 🎨 라벨 스타일 ===== */
 label,
 .gr-input-label,
@@ -597,15 +546,30 @@ label,
     font-size: 1rem !important;
 }
-/* ===== 🎨 프로그레스 바 ===== */
-.progress-bar,
-.gr-progress-bar {
-    background: #3B82F6 !important;
-    border: 2px solid #1F2937 !important;
-    border-radius: 4px !important;
 }
-/* ===== 🎨 스크롤바 ===== */
 ::-webkit-scrollbar {
     width: 12px;
     height: 12px;
@@ -672,9 +636,9 @@ a:hover {
         box-shadow: 4px 4px 0px #1F2937 !important;
     }
-    .stat-card {
-        min-width: 100px !important;
-        padding: 0.8rem 1rem !important;
     }
 }
@@ -687,10 +651,8 @@ a:hover {
 """
-# ============================================
-# Build the Gradio Interface
-# ============================================
-with gr.Blocks(fill_height=True, css=css, title="Ansim Blur - Face Privacy Protection") as demo:
     # HOME Badge
     gr.HTML("""
@@ -698,270 +660,189 @@ with gr.Blocks(fill_height=True, css=css, title="Ansim Blur - Face Privacy Prote
             <a href="https://www.humangen.ai" target="_blank" style="text-decoration: none;">
                 <img src="https://img.shields.io/static/v1?label=🏠 HOME&message=HUMANGEN.AI&color=0000ff&labelColor=ffcc00&style=for-the-badge" alt="HOME">
             </a>
-            <a href="https://discord.gg/openfreeai" target="_blank" style="text-decoration: none; margin-left: 10px;">
-                <img src="https://img.shields.io/static/v1?label=Discord&message=OpenFree%20AI&color=5865F2&labelColor=1F2937&logo=discord&logoColor=white&style=for-the-badge" alt="Discord">
-            </a>
         </div>
     """)
     # Header Title
     gr.Markdown(
         """
-        # 🔒 ANSIM BLUR - FACE PRIVACY 🛡️
         """,
         elem_classes="header-text"
     )
     gr.Markdown(
         """
-        <p class="subtitle">🎭 Advanced AI-Powered Face Detection & Privacy Protection! ✨</p>
         """,
     )
-    # Stats Cards
-    gr.HTML("""
-        <div class="stats-row">
-            <div class="stat-card">
-                <span class="emoji">🖼️</span>
-                <span class="label">Image Support</span>
-            </div>
-            <div class="stat-card">
-                <span class="emoji">🎥</span>
-                <span class="label">Video Processing</span>
-            </div>
-            <div class="stat-card">
-                <span class="emoji">⚡</span>
-                <span class="label">Real-time AI</span>
-            </div>
-            <div class="stat-card">
-                <span class="emoji">🛡️</span>
-                <span class="label">Privacy First</span>
-            </div>
-        </div>
-    """)
-    # Device Info
-    gr.Markdown(f"""
-        <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin: 1rem 0;">
-            🖥️ Running on: <span style="color: #3B82F6;">{device.upper()}</span>
-        </p>
-    """)
-    # Main Tabs
     with gr.Tabs():
-        # ===== IMAGE TAB =====
-        with gr.Tab("📸 Image Processing"):
             with gr.Row(equal_height=False):
-                # Left Column - Input & Settings
-                with gr.Column(scale=1, min_width=400):
-                    input_image = gr.Image(
-                        label="🖼️ Upload Image",
-                        type="pil",
-                        height=350
                     )
-                    with gr.Accordion("⚙️ Detection Settings", open=True):
-                        conf_img = gr.Slider(
-                            minimum=0.05,
-                            maximum=0.9,
-                            value=0.25,
-                            step=0.01,
-                            label="🎯 Confidence Threshold"
-                        )
-                        iou_img = gr.Slider(
-                            minimum=0.1,
-                            maximum=0.9,
-                            value=0.45,
-                            step=0.01,
-                            label="📐 NMS IoU"
-                        )
-                        expand_img = gr.Slider(
-                            minimum=0.0,
-                            maximum=0.5,
-                            value=0.05,
-                            step=0.01,
-                            label="🔲 Box Expansion"
-                        )
-                    with gr.Accordion("🎨 Blur Settings", open=True):
-                        mode_img = gr.Dropdown(
-                            choices=["Gaussian Blur", "Mosaic Effect"],
-                            value="Gaussian Blur",
-                            label="🖌️ Style"
-                        )
-                        blur_intensity_img = gr.Slider(
-                            minimum=15,
-                            maximum=151,
-                            value=51,
-                            step=2,
-                            label="💨 Blur Intensity"
-                        )
-                        mosaic_size_img = gr.Slider(
-                            minimum=5,
-                            maximum=40,
-                            value=15,
-                            step=1,
-                            label="🧩 Mosaic Size"
-                        )
-                    process_img_btn = gr.Button(
-                        "🔍 PROCESS IMAGE! 🎭",
                         variant="primary",
-                        size="lg",
-                        elem_classes="process-btn"
-                    )
-                # Right Column - Output
-                with gr.Column(scale=1, min_width=400):
-                    output_image = gr.Image(
-                        label="🖼️ Processed Result",
-                        type="pil",
-                        height=350
                     )
-                    with gr.Accordion("📜 Processing Log", open=True):
-                        info_log_img = gr.Textbox(
                             label="",
-                            placeholder="Upload an image and click process...",
                             lines=12,
-                            max_lines=18,
                             interactive=False,
                             elem_classes="info-log"
                         )
-        # ===== VIDEO TAB =====
-        with gr.Tab("🎬 Video Processing"):
             with gr.Row(equal_height=False):
-                # Left Column - Input & Settings
-                with gr.Column(scale=1, min_width=400):
-                    input_video = gr.Video(
-                        label="🎥 Upload Video",
-                        height=350
                     )
-                    with gr.Accordion("⚙️ Detection Settings", open=True):
-                        conf_vid = gr.Slider(
-                            minimum=0.05,
-                            maximum=0.9,
-                            value=0.25,
-                            step=0.01,
-                            label="🎯 Confidence Threshold"
-                        )
-                        iou_vid = gr.Slider(
-                            minimum=0.1,
-                            maximum=0.9,
-                            value=0.45,
-                            step=0.01,
-                            label="📐 NMS IoU"
-                        )
-                        expand_vid = gr.Slider(
-                            minimum=0.0,
-                            maximum=0.5,
-                            value=0.05,
-                            step=0.01,
-                            label="🔲 Box Expansion"
-                        )
-                    with gr.Accordion("🎨 Blur Settings", open=True):
-                        mode_vid = gr.Dropdown(
-                            choices=["Gaussian Blur", "Mosaic Effect"],
-                            value="Gaussian Blur",
-                            label="🖌️ Style"
-                        )
-                        blur_intensity_vid = gr.Slider(
-                            minimum=15,
-                            maximum=151,
-                            value=51,
-                            step=2,
-                            label="💨 Blur Intensity"
-                        )
-                        mosaic_size_vid = gr.Slider(
-                            minimum=5,
-                            maximum=40,
-                            value=15,
-                            step=1,
-                            label="🧩 Mosaic Size"
-                        )
-                    process_vid_btn = gr.Button(
-                        "🎬 PROCESS VIDEO! 🛡️",
                         variant="primary",
-                        size="lg",
-                        elem_classes="process-btn"
-                    )
-                # Right Column - Output
-                with gr.Column(scale=1, min_width=400):
-                    output_video = gr.Video(
-                        label="🎥 Processed Result",
-                        height=350
                     )
-                    with gr.Accordion("📜 Processing Log", open=True):
-                        info_log_vid = gr.Textbox(
                             label="",
-                            placeholder="Upload a video and click process...",
                             lines=12,
-                            max_lines=18,
                             interactive=False,
                             elem_classes="info-log"
                         )
-    # Instructions
-    gr.Markdown(
-        """
-        <div style="background: linear-gradient(135deg, #EFF6FF 0%, #DBEAFE 100%); border: 3px solid #3B82F6; border-radius: 12px; padding: 1.5rem; box-shadow: 6px 6px 0px #1F2937; margin-top: 2rem;">
-            <h3 style="font-family: 'Bangers', cursive; color: #1F2937; font-size: 1.3rem; margin-bottom: 0.5rem;">📝 HOW TO USE</h3>
-            <ol style="font-family: 'Comic Neue', cursive; color: #1F2937; font-weight: 700;">
-                <li>Upload an image or video containing faces</li>
-                <li>Adjust detection settings (confidence, IoU, expansion)</li>
-                <li>Choose blur style (Gaussian or Mosaic)</li>
-                <li>Click the Process button and wait for results</li>
-                <li>Download your privacy-protected media!</li>
-            </ol>
-        </div>
-        <div style="background: linear-gradient(135deg, #FEF3C7 0%, #FDE68A 100%); border: 3px solid #F59E0B; border-radius: 12px; padding: 1.5rem; box-shadow: 6px 6px 0px #1F2937; margin-top: 1rem;">
-            <h3 style="font-family: 'Bangers', cursive; color: #1F2937; font-size: 1.3rem; margin-bottom: 0.5rem;">💡 TIPS</h3>
-            <ul style="font-family: 'Comic Neue', cursive; color: #1F2937; font-weight: 700;">
-                <li>Lower confidence = more faces detected (may include false positives)</li>
-                <li>Higher blur intensity = stronger privacy protection</li>
-                <li>Mosaic effect works better for artistic results</li>
-                <li>Video processing may take time depending on length</li>
-            </ul>
-        </div>
-        """
-    )
-    # Event Handlers
-    process_img_btn.click(
-        fn=process_image,
-        inputs=[
-            input_image,
-            conf_img,
-            iou_img,
-            expand_img,
-            mode_img,
-            blur_intensity_img,
-            mosaic_size_img
-        ],
-        outputs=[output_image, info_log_img]
-    )
-    process_vid_btn.click(
-        fn=process_video,
-        inputs=[
-            input_video,
-            conf_vid,
-            iou_vid,
-            expand_vid,
-            mode_vid,
-            blur_intensity_vid,
-            mosaic_size_vid
-        ],
-        outputs=[output_video, info_log_vid]
-    )
 if __name__ == "__main__":
-    demo.launch()

+import spaces
+import logging
+from datetime import datetime
 from pathlib import Path
+import gradio as gr
 import torch
+import torchaudio
+import os
+import requests
+from transformers import pipeline
+import tempfile
+import numpy as np
+from einops import rearrange
+import cv2
+from scipy.io import wavfile
+import librosa
+import json
+from typing import Optional, Tuple, List
+import atexit
+# 환경 변수 설정으로 torch.load 체크 우회 (임시 해결책)
+os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
+try:
+    import mmaudio
+except ImportError:
+    os.system("pip install -e .")
+    import mmaudio
+from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video,
+                                setup_eval_logging)
+from mmaudio.model.flow_matching import FlowMatching
+from mmaudio.model.networks import MMAudio, get_my_mmaudio
+from mmaudio.model.sequence_config import SequenceConfig
+from mmaudio.model.utils.features_utils import FeaturesUtils
+# 로깅 설정
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+log = logging.getLogger()
+# CUDA 설정
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
+    torch.backends.cudnn.benchmark = True
+else:
+    device = torch.device("cpu")
+dtype = torch.bfloat16
+# 모델 설정
+model: ModelConfig = all_model_cfg['large_44k_v2']
+model.download_if_needed()
+output_dir = Path('./output/gradio')
+setup_eval_logging()
+# 번역기 설정
+try:
+    translator = pipeline("translation",
+                         model="Helsinki-NLP/opus-mt-ko-en",
+                         device="cpu",
+                         use_fast=True,
+                         trust_remote_code=False)
+except Exception as e:
+    log.warning(f"Failed to load translation model with safetensors: {e}")
     try:
+        translator = pipeline("translation",
+                             model="Helsinki-NLP/opus-mt-ko-en",
+                             device="cpu")
+    except Exception as e2:
+        log.error(f"Failed to load translation model: {e2}")
+        translator = None
+PIXABAY_API_KEY = "33492762-a28a596ec4f286f84cd328b17"
+def cleanup_temp_files():
+    temp_dir = tempfile.gettempdir()
+    for file in os.listdir(temp_dir):
+        if file.endswith(('.mp4', '.flac')):
+            try:
+                os.remove(os.path.join(temp_dir, file))
+            except:
+                pass
+atexit.register(cleanup_temp_files)
+def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
+    with torch.cuda.device(device):
+        seq_cfg = model.seq_cfg
+        net: MMAudio = get_my_mmaudio(model.model_name).to(device, dtype).eval()
+        net.load_weights(torch.load(model.model_path, map_location=device, weights_only=True))
+        log.info(f'Loaded weights from {model.model_path}')
+        feature_utils = FeaturesUtils(
+            tod_vae_ckpt=model.vae_path,
+            synchformer_ckpt=model.synchformer_ckpt,
+            enable_conditions=True,
+            mode=model.mode,
+            bigvgan_vocoder_ckpt=model.bigvgan_16k_path,
+            need_vae_encoder=False
+        ).to(device, dtype).eval()
+        return net, feature_utils, seq_cfg
+net, feature_utils, seq_cfg = get_model()
+def translate_prompt(text):
+    try:
+        if translator is None:
+            return text
+        if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text):
             with torch.no_grad():
+                translation = translator(text)[0]['translation_text']
+            return translation
+        return text
+    except Exception as e:
+        logging.error(f"Translation error: {e}")
+        return text
+@torch.no_grad()
+def search_videos(query):
     try:
+        query = translate_prompt(query)
+        return search_pixabay_videos(query, PIXABAY_API_KEY)
     except Exception as e:
+        logging.error(f"Video search error: {e}")
+        return []
+def search_pixabay_videos(query, api_key):
+    try:
+        base_url = "https://pixabay.com/api/videos/"
+        params = {
+            "key": api_key,
+            "q": query,
+            "per_page": 40
+        }
+        response = requests.get(base_url, params=params)
+        if response.status_code == 200:
+            data = response.json()
+            return [video['videos']['large']['url'] for video in data.get('hits', [])]
+        return []
+    except Exception as e:
+        logging.error(f"Pixabay API error: {e}")
+        return []
+@spaces.GPU
+@torch.inference_mode()
+def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
+                   cfg_strength: float, duration: float):
+    prompt = translate_prompt(prompt)
+    negative_prompt = translate_prompt(negative_prompt)
+    rng = torch.Generator(device=device)
+    rng.manual_seed(seed)
+    fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
+    clip_frames, sync_frames, duration = load_video(video, duration)
+    clip_frames = clip_frames.unsqueeze(0)
+    sync_frames = sync_frames.unsqueeze(0)
+    seq_cfg.duration = duration
+    net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
+    audios = generate(clip_frames,
+                      sync_frames, [prompt],
+                      negative_text=[negative_prompt],
+                      feature_utils=feature_utils,
+                      net=net,
+                      fm=fm,
+                      rng=rng,
+                      cfg_strength=cfg_strength)
+    audio = audios.float().cpu()[0]
+    video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
+    make_video(video,
+               video_save_path,
+               audio,
+               sampling_rate=seq_cfg.sampling_rate,
+               duration_sec=seq_cfg.duration)
+    # 정보 로그 생성
+    info_log = f"""✅ VIDEO TO AUDIO COMPLETE!
 {'=' * 50}
+🎬 Video Info:
+   • Duration: {duration:.2f} seconds
 {'=' * 50}
+⚙️ Generation Settings:
+   • Seed: {seed}
+   • Steps: {num_steps}
+   • CFG Strength: {cfg_strength}
 {'=' * 50}
+📝 Prompts:
+   • Prompt: {prompt[:40]}{'...' if len(prompt) > 40 else ''}
+   • Negative: {negative_prompt[:30]}{'...' if len(negative_prompt) > 30 else ''}
 {'=' * 50}
+💾 Video with audio ready!"""
+    return video_save_path, info_log
+@spaces.GPU
+@torch.inference_mode()
+def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int, cfg_strength: float,
+                  duration: float):
+    prompt = translate_prompt(prompt)
+    negative_prompt = translate_prompt(negative_prompt)
+    rng = torch.Generator(device=device)
+    rng.manual_seed(seed)
+    fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
+    clip_frames = sync_frames = None
+    seq_cfg.duration = duration
+    net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
+    audios = generate(clip_frames,
+                      sync_frames, [prompt],
+                      negative_text=[negative_prompt],
+                      feature_utils=feature_utils,
+                      net=net,
+                      fm=fm,
+                      rng=rng,
+                      cfg_strength=cfg_strength)
+    audio = audios.float().cpu()[0]
+    audio_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.flac').name
+    torchaudio.save(audio_save_path, audio, seq_cfg.sampling_rate)
+    # 정보 로그 생성
+    info_log = f"""✅ TEXT TO AUDIO COMPLETE!
 {'=' * 50}
+🎵 Audio Info:
+   • Duration: {duration:.2f} seconds
+   • Sample Rate: {seq_cfg.sampling_rate} Hz
 {'=' * 50}
+⚙️ Generation Settings:
+   • Seed: {seed}
+   • Steps: {num_steps}
+   • CFG Strength: {cfg_strength}
 {'=' * 50}
+📝 Prompts:
+   • Prompt: {prompt[:40]}{'...' if len(prompt) > 40 else ''}
+   • Negative: {negative_prompt[:30]}{'...' if len(negative_prompt) > 30 else ''}
 {'=' * 50}
+💾 Audio ready to download!"""
+    return audio_save_path, info_log
 # ============================================
 /* ===== 메인 컨테이너 ===== */
 #col-container {
+    max-width: 1200px;
     margin: 0 auto;
 }
     font-weight: 700 !important;
 }
+/* ===== 🎨 탭 스타일 ===== */
+.tabs {
+    background: #FFFFFF !important;
     border: 3px solid #1F2937 !important;
     border-radius: 12px !important;
+    box-shadow: 6px 6px 0px #1F2937 !important;
+    padding: 10px !important;
 }
+.tab-nav {
+    background: #FACC15 !important;
+    border-radius: 8px !important;
+    padding: 5px !important;
+    border: 2px solid #1F2937 !important;
 }
+.tab-nav button {
+    font-family: 'Bangers', cursive !important;
+    font-size: 1.1rem !important;
+    letter-spacing: 1px !important;
+    color: #1F2937 !important;
+    background: transparent !important;
+    border: none !important;
+    padding: 10px 20px !important;
+    border-radius: 6px !important;
+    transition: all 0.2s ease !important;
+}
+.tab-nav button:hover {
+    background: #FEF3C7 !important;
+}
+.tab-nav button.selected {
+    background: #3B82F6 !important;
     color: #FFFFFF !important;
+    box-shadow: 3px 3px 0px #1F2937 !important;
 }
 /* ===== 🎨 카드/패널 - 만화 프레임 스타일 ===== */
     box-shadow: 8px 8px 0px #1F2937 !important;
 }
+/* ===== 🎨 입력 필드 (Textbox) ===== */
 textarea,
 input[type="text"],
 input[type="number"] {
     outline: none !important;
 }
+textarea::placeholder {
+    color: #9CA3AF !important;
+    font-weight: 400 !important;
 }
+/* ===== 🎨 Primary 버튼 - 코믹 블루 ===== */
 .gr-button-primary,
 button.primary,
+.gr-button.primary {
     background: #3B82F6 !important;
     border: 3px solid #1F2937 !important;
     border-radius: 8px !important;
 .gr-button-primary:hover,
 button.primary:hover,
+.gr-button.primary:hover {
     background: #2563EB !important;
     transform: translate(-2px, -2px) !important;
     box-shadow: 7px 7px 0px #1F2937 !important;
 .gr-button-primary:active,
 button.primary:active,
+.gr-button.primary:active {
     transform: translate(3px, 3px) !important;
     box-shadow: 2px 2px 0px #1F2937 !important;
 }
+/* ===== 🎨 Secondary 버튼 - 코믹 레드 ===== */
+.gr-button-secondary,
+button.secondary {
+    background: #EF4444 !important;
+    border: 3px solid #1F2937 !important;
+    border-radius: 8px !important;
+    color: #FFFFFF !important;
+    font-family: 'Bangers', cursive !important;
+    font-weight: 400 !important;
+    font-size: 1.1rem !important;
+    letter-spacing: 1px !important;
+    box-shadow: 4px 4px 0px #1F2937 !important;
+    transition: all 0.1s ease !important;
+    text-shadow: 1px 1px 0px #1F2937 !important;
+}
+.gr-button-secondary:hover,
+button.secondary:hover {
+    background: #DC2626 !important;
+    transform: translate(-2px, -2px) !important;
+    box-shadow: 6px 6px 0px #1F2937 !important;
+}
 /* ===== 🎨 로그 출력 영역 ===== */
 .info-log textarea {
     background: #1F2937 !important;
     box-shadow: 4px 4px 0px #10B981 !important;
 }
+/* ===== 🎨 비디오/오디오 영역 ===== */
 .gr-video,
+.gr-audio,
+video,
+audio {
     border: 4px solid #1F2937 !important;
     border-radius: 8px !important;
     box-shadow: 8px 8px 0px #1F2937 !important;
     background: #FFFFFF !important;
 }
+/* ===== 🎨 갤러리 스타일 ===== */
+.gr-gallery {
+    background: #FFFFFF !important;
+    border: 3px solid #1F2937 !important;
+    border-radius: 8px !important;
+    box-shadow: 6px 6px 0px #1F2937 !important;
+    padding: 10px !important;
+}
+.gr-gallery .thumbnail-item {
+    border: 3px solid #1F2937 !important;
+    border-radius: 6px !important;
+    transition: all 0.2s ease !important;
+    overflow: hidden !important;
+}
+.gr-gallery .thumbnail-item:hover {
+    transform: scale(1.05) !important;
+    box-shadow: 4px 4px 0px #3B82F6 !important;
+}
 /* ===== 🎨 슬라이더 스타일 ===== */
 input[type="range"] {
     accent-color: #3B82F6 !important;
     background: #FFFFFF !important;
 }
 /* ===== 🎨 라벨 스타일 ===== */
 label,
 .gr-input-label,
     font-size: 1rem !important;
 }
+span.gr-label {
+    color: #1F2937 !important;
+}
+/* ===== 🎨 정보 텍스트 ===== */
+.gr-info,
+.info {
+    color: #6B7280 !important;
+    font-family: 'Comic Neue', cursive !important;
+    font-size: 0.9rem !important;
 }
+/* ===== 🎨 Number Input 스타일 ===== */
+.gr-number input {
+    background: #FFFFFF !important;
+    border: 3px solid #1F2937 !important;
+    border-radius: 8px !important;
+    color: #1F2937 !important;
+    font-family: 'Comic Neue', cursive !important;
+    font-weight: 700 !important;
+    box-shadow: 3px 3px 0px #1F2937 !important;
+}
+/* ===== 🎨 스크롤바 - 코믹 스타일 ===== */
 ::-webkit-scrollbar {
     width: 12px;
     height: 12px;
         box-shadow: 4px 4px 0px #1F2937 !important;
     }
+    .tab-nav button {
+        font-size: 0.9rem !important;
+        padding: 8px 12px !important;
     }
 }
 """
+# Gradio Blocks 인터페이스 생성
+with gr.Blocks(fill_height=True, css=css, title="MMAudio Studio") as demo:
     # HOME Badge
     gr.HTML("""
             <a href="https://www.humangen.ai" target="_blank" style="text-decoration: none;">
                 <img src="https://img.shields.io/static/v1?label=🏠 HOME&message=HUMANGEN.AI&color=0000ff&labelColor=ffcc00&style=for-the-badge" alt="HOME">
             </a>
         </div>
     """)
     # Header Title
     gr.Markdown(
         """
+        # 🎵 MMAUDIO STUDIO 🎬
         """,
         elem_classes="header-text"
     )
     gr.Markdown(
         """
+        <p class="subtitle">🔊 Generate Audio from Text or Video • Korean Supported! 한글지원 🇰🇷</p>
         """,
     )
     with gr.Tabs():
+        # Tab 1: Video Search
+        with gr.TabItem("🔍 Video Search"):
+            gr.Markdown(
+                """
+                <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;">
+                    📹 Search for videos from Pixabay to use as input!
+                </p>
+                """
+            )
+            with gr.Row():
+                with gr.Column(scale=1):
+                    search_query = gr.Textbox(
+                        label="🔎 Search Query (한글지원)" if translator else "🔎 Search Query",
+                        placeholder="Enter search keywords...",
+                        lines=1
+                    )
+                    search_btn = gr.Button(
+                        "🔍 SEARCH VIDEOS!",
+                        variant="primary",
+                        size="lg"
+                    )
+            search_gallery = gr.Gallery(
+                label="📺 Search Results",
+                columns=4,
+                rows=5,
+                height=500
+            )
+            search_btn.click(
+                fn=search_videos,
+                inputs=[search_query],
+                outputs=[search_gallery]
+            )
+        # Tab 2: Video to Audio
+        with gr.TabItem("🎬 Video-to-Audio"):
+            gr.Markdown(
+                """
+                <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;">
+                    🎥 Upload a video and generate matching audio!
+                </p>
+                """
+            )
             with gr.Row(equal_height=False):
+                with gr.Column(scale=1):
+                    v2a_video = gr.Video(label="📹 Input Video")
+                    v2a_prompt = gr.Textbox(
+                        label="✏️ Prompt (한글지원)" if translator else "✏️ Prompt",
+                        placeholder="Describe the audio you want...",
+                        lines=2
+                    )
+                    v2a_negative = gr.Textbox(
+                        label="🚫 Negative Prompt",
+                        value="music",
+                        lines=1
                     )
+                    with gr.Row():
+                        v2a_seed = gr.Number(label="🎲 Seed", value=0)
+                        v2a_steps = gr.Number(label="🔄 Steps", value=25)
+                    with gr.Row():
+                        v2a_cfg = gr.Number(label="🎯 Guidance Scale", value=4.5)
+                        v2a_duration = gr.Number(label="⏱️ Duration (sec)", value=8)
+                    v2a_btn = gr.Button(
+                        "🎬 GENERATE AUDIO! 🔊",
                         variant="primary",
+                        size="lg"
                     )
+                    with gr.Accordion("📜 Generation Log", open=True):
+                        v2a_log = gr.Textbox(
                             label="",
+                            placeholder="Upload video and click generate...",
                             lines=12,
                             interactive=False,
                             elem_classes="info-log"
                         )
+                with gr.Column(scale=1):
+                    v2a_output = gr.Video(label="🎥 Generated Result", height=400)
+                    gr.Markdown(
+                        """
+                        <p style="text-align: center; margin-top: 15px; font-weight: 700; color: #1F2937;">
+                            💡 Right-click on the video to save!
+                        </p>
+                        """
+                    )
+            v2a_btn.click(
+                fn=video_to_audio,
+                inputs=[v2a_video, v2a_prompt, v2a_negative, v2a_seed, v2a_steps, v2a_cfg, v2a_duration],
+                outputs=[v2a_output, v2a_log]
+            )
+        # Tab 3: Text to Audio
+        with gr.TabItem("🎵 Text-to-Audio"):
+            gr.Markdown(
+                """
+                <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;">
+                    ✨ Generate audio from text description!
+                </p>
+                """
+            )
             with gr.Row(equal_height=False):
+                with gr.Column(scale=1):
+                    t2a_prompt = gr.Textbox(
+                        label="✏️ Prompt (한글지원)" if translator else "✏️ Prompt",
+                        placeholder="Describe the audio you want to generate...",
+                        lines=3
+                    )
+                    t2a_negative = gr.Textbox(
+                        label="🚫 Negative Prompt",
+                        placeholder="What to avoid...",
+                        lines=1
                     )
+                    with gr.Row():
+                        t2a_seed = gr.Number(label="🎲 Seed", value=0)
+                        t2a_steps = gr.Number(label="🔄 Steps", value=25)
+                    with gr.Row():
+                        t2a_cfg = gr.Number(label="🎯 Guidance Scale", value=4.5)
+                        t2a_duration = gr.Number(label="⏱️ Duration (sec)", value=8)
+                    t2a_btn = gr.Button(
+                        "🎵 GENERATE AUDIO! ✨",
                         variant="primary",
+                        size="lg"
                     )
+                    with gr.Accordion("📜 Generation Log", open=True):
+                        t2a_log = gr.Textbox(
                             label="",
+                            placeholder="Enter prompt and click generate...",
                             lines=12,
                             interactive=False,
                             elem_classes="info-log"
                         )
+                with gr.Column(scale=1):
+                    t2a_output = gr.Audio(label="🔊 Generated Audio")
+                    gr.Markdown(
+                        """
+                        <p style="text-align: center; margin-top: 15px; font-weight: 700; color: #1F2937;">
+                            💡 Click the download button to save!
+                        </p>
+                        """
+                    )
+            t2a_btn.click(
+                fn=text_to_audio,
+                inputs=[t2a_prompt, t2a_negative, t2a_seed, t2a_steps, t2a_cfg, t2a_duration],
+                outputs=[t2a_output, t2a_log]
+            )
+# 메인 실행
 if __name__ == "__main__":
+    if translator is None:
+        log.warning("Translation model failed to load. Korean translation will be disabled.")
+    demo.launch(allowed_paths=[output_dir])