SAM3-Demo

Running

App Files Files Community

Translsis commited on Dec 23, 2025

Commit

9399c1d

verified ·

1 Parent(s): cb253f6

Upload app.py

Browse files

Files changed (1) hide show

app.py +400 -935

app.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import os
 import cv2
 import spaces
 import gradio as gr
 import numpy as np
 import torch
 import matplotlib
 from PIL import Image, ImageDraw
 from typing import Iterable
 from gradio.themes import Soft
@@ -19,24 +21,46 @@ from datetime import datetime
 import threading
 import queue
 import uuid
-import shutil
-import zipfile
 # ============ THEME SETUP ============
 colors.steel_blue = colors.Color(
     name="steel_blue",
-    c50="#EBF3F8", c100="#D3E5F0", c200="#A8CCE1", c300="#7DB3D2",
-    c400="#529AC3", c500="#4682B4", c600="#3E72A0", c700="#36638C",
-    c800="#2E5378", c900="#264364", c950="#1E3450"
 )
 class CustomBlueTheme(Soft):
-    def __init__(self, *, primary_hue=colors.gray, secondary_hue=colors.steel_blue,
-                 neutral_hue=colors.slate, text_size=sizes.text_lg,
-                 font=(fonts.GoogleFont("Outfit"), "Arial", "sans-serif"),
-                 font_mono=(fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace")):
-        super().__init__(primary_hue=primary_hue, secondary_hue=secondary_hue,
-                         neutral_hue=neutral_hue, text_size=text_size, font=font, font_mono=font_mono)
         super().set(
             background_fill_primary="*primary_50",
             background_fill_primary_dark="*primary_900",
@@ -62,255 +86,76 @@ class CustomBlueTheme(Soft):
 app_theme = CustomBlueTheme()
 # ============ GLOBAL SETUP ============
-device = "cpu"  # Force CPU usage
-print(f"🖥️ Using device: {device} (CPU mode for stability)")
 HISTORY_DIR = "processing_history"
-OUTPUTS_DIR = os.path.join(HISTORY_DIR, "outputs")
-DOWNLOADS_DIR = os.path.join(HISTORY_DIR, "downloads")
-os.makedirs(OUTPUTS_DIR, exist_ok=True)
-os.makedirs(DOWNLOADS_DIR, exist_ok=True)
 HISTORY_FILE = os.path.join(HISTORY_DIR, "history.json")
 processing_queue = queue.Queue()
 processing_results = {}
 # Load models
-print("⏳ Loading SAM3 Models...")
 try:
-    print("   Loading Image Model...")
     IMG_MODEL = Sam3Model.from_pretrained("DiffusionWave/sam3").to(device)
     IMG_PROCESSOR = Sam3Processor.from_pretrained("DiffusionWave/sam3")
-    print("   Loading Tracker Model...")
     TRK_MODEL = Sam3TrackerModel.from_pretrained("DiffusionWave/sam3").to(device)
     TRK_PROCESSOR = Sam3TrackerProcessor.from_pretrained("DiffusionWave/sam3")
-    print("   Loading Video Model...")
-    VID_MODEL = Sam3VideoModel.from_pretrained("DiffusionWave/sam3").to(device)  # No bfloat16 for CPU
     VID_PROCESSOR = Sam3VideoProcessor.from_pretrained("DiffusionWave/sam3")
-    print("✅ All models loaded successfully on CPU!")
 except Exception as e:
-    print(f"❌ Error loading models: {e}")
     IMG_MODEL = IMG_PROCESSOR = TRK_MODEL = TRK_PROCESSOR = VID_MODEL = VID_PROCESSOR = None
 # ============ HISTORY MANAGEMENT ============
 def load_history():
     if os.path.exists(HISTORY_FILE):
         try:
-            with open(HISTORY_FILE, 'r', encoding='utf-8') as f:
                 return json.load(f)
         except:
             return []
     return []
-def save_history(item):
-    history = load_history()
-    history.insert(0, item)
-    history = history[:200]
-    with open(HISTORY_FILE, 'w', encoding='utf-8') as f:
-        json.dump(history, f, indent=2, ensure_ascii=False)
-def get_history_stats():
-    history = load_history()
-    total = len(history)
-    completed = sum(1 for h in history if h['status'] == 'completed')
-    errors = sum(1 for h in history if h['status'] == 'error')
-    types = {}
-    for h in history:
-        t = h['type']
-        types[t] = types.get(t, 0) + 1
-    return {
-        'total': total,
-        'completed': completed,
-        'errors': errors,
-        'success_rate': f"{(completed/total*100):.1f}%" if total > 0 else "0%",
-        'types': types
-    }
-def create_download_package(item_id):
-    history = load_history()
-    item = next((h for h in history if h['id'] == item_id), None)
-    if not item or item['status'] != 'completed':
-        return None
-    zip_path = os.path.join(DOWNLOADS_DIR, f"{item_id}_results.zip")
-    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-        metadata = {
-            'job_id': item_id,
-            'type': item['type'],
-            'prompt': item.get('prompt', 'N/A'),
-            'timestamp': item['timestamp'],
-            'duration': item.get('duration', 'N/A'),
-            'num_objects': item.get('num_objects', 0)
-        }
-        zipf.writestr('metadata.json', json.dumps(metadata, indent=2, ensure_ascii=False))
-        if item['type'] == 'image':
-            if item.get('output_path') and os.path.exists(item['output_path']):
-                zipf.write(item['output_path'], 'overlay.jpg')
-            if item.get('segmented_files'):
-                for i, f in enumerate(item['segmented_files'], 1):
-                    if os.path.exists(f):
-                        zipf.write(f, f'objects/object_{i}.png')
-        elif item['type'] == 'video':
-            if item.get('output_path') and os.path.exists(item['output_path']):
-                zipf.write(item['output_path'], 'overlay_video.mp4')
-            if item.get('mask_video_path') and os.path.exists(item['mask_video_path']):
-                zipf.write(item['mask_video_path'], 'masks_only.mp4')
-            if item.get('segmented_video_path') and os.path.exists(item['segmented_video_path']):
-                zipf.write(item['segmented_video_path'], 'segmented_video.mp4')
-        elif item['type'] == 'click':
-            if item.get('output_path') and os.path.exists(item['output_path']):
-                zipf.write(item['output_path'], 'result.jpg')
-    return zip_path
-def get_downloadable_jobs():
     history = load_history()
-    choices = []
-    for item in history:
-        if item['status'] == 'completed':
-            type_emoji = {'image': '📷', 'video': '🎥', 'click': '👆'}.get(item['type'], '📄')
-            label = f"{type_emoji} [{item['type'].upper()}] {item['prompt'][:35]}... | {item['timestamp']}"
-            choices.append((label, item['id']))
-    return choices if choices else [("No completed jobs available", None)]
-def format_history_table():
     history = load_history()
     if not history:
-        return "<p style='text-align:center; color:#666; padding:40px;'>📭 Chưa có lịch sử xử lý nào</p>"
-    html = """
-    <style>
-        .history-table { width: 100%; border-collapse: collapse; font-size: 14px; background: white; border-radius: 8px; overflow: hidden; box-shadow: 0 2px 8px rgba(0,0,0,0.1); }
-        .history-table th { background: linear-gradient(90deg, #4682B4, #529AC3); color: white; padding: 14px 12px; text-align: left; font-weight: 600; text-transform: uppercase; font-size: 12px; letter-spacing: 0.5px; }
-        .history-table td { padding: 12px; border-bottom: 1px solid #e8e8e8; vertical-align: middle; }
-        .history-table tr:hover { background-color: #f8f9fa; }
-        .history-table tr:last-child td { border-bottom: none; }
-        .status-badge { padding: 5px 12px; border-radius: 14px; font-size: 11px; font-weight: 700; text-transform: uppercase; letter-spacing: 0.5px; display: inline-block; }
-        .status-completed { background: linear-gradient(135deg, #d4edda, #c3e6cb); color: #155724; }
-        .status-error { background: linear-gradient(135deg, #f8d7da, #f5c6cb); color: #721c24; }
-        .type-badge { padding: 5px 10px; border-radius: 10px; font-size: 11px; font-weight: 600; background: linear-gradient(135deg, #e3f2fd, #bbdefb); color: #1565c0; display: inline-block; }
-        .prompt-text { max-width: 280px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; color: #333; font-weight: 500; }
-        .file-count { font-size: 11px; color: #666; margin-top: 4px; line-height: 1.4; }
-        .job-id { font-family: 'Courier New', monospace; font-size: 10px; color: #999; background: #f5f5f5; padding: 3px 6px; border-radius: 4px; }
-        .time-info { font-size: 12px; color: #666; }
-        .duration { font-size: 11px; color: #999; margin-top: 3px; }
-    </style>
-    <table class='history-table'>
-        <thead>
-            <tr>
-                <th style='width: 40px; text-align: center;'>#</th>
-                <th style='width: 110px;'>Job ID</th>
-                <th style='width: 90px;'>Type</th>
-                <th style='width: 110px;'>Status</th>
-                <th>Prompt</th>
-                <th style='width: 120px;'>Output Files</th>
-                <th style='width: 140px;'>Time</th>
-            </tr>
-        </thead>
-        <tbody>
-    """
-    for i, item in enumerate(history[:100], 1):
-        status_class = f"status-{item['status']}"
-        status_text = "✅ Completed" if item['status'] == 'completed' else "❌ Error"
-        type_icons = {'image': '📷', 'video': '🎥', 'click': '👆'}
-        type_icon = type_icons.get(item['type'], '📄')
-        prompt = item.get('prompt', 'N/A')
-        prompt_short = prompt[:45] + ('...' if len(prompt) > 45 else '')
-        file_info = []
         if item.get('output_path'):
-            file_info.append("✓ Overlay")
-        if item.get('segmented_files'):
-            file_info.append(f"✓ {len(item['segmented_files'])} Objects")
-        if item.get('mask_video_path'):
-            file_info.append("✓ Masks")
-        if item.get('segmented_video_path'):
-            file_info.append("✓ Segmented")
-        files_text = "<br>".join(file_info) if file_info else "No files"
-        html += f"""
-            <tr>
-                <td style='text-align: center; font-weight: 600; color: #999;'>{i}</td>
-                <td><span class='job-id'>{item['id'][:12]}</span></td>
-                <td><span class='type-badge'>{type_icon} {item['type'].upper()}</span></td>
-                <td><span class='status-badge {status_class}'>{status_text}</span></td>
-                <td class='prompt-text' title='{prompt}'>{prompt_short}</td>
-                <td><div class='file-count'>{files_text}</div></td>
-                <td>
-                    <div class='time-info'>{item['timestamp']}</div>
-                    <div class='duration'>⏱️ {item.get('duration', 'N/A')}</div>
-                </td>
-            </tr>
-        """
-    html += """
-        </tbody>
-    </table>
-    """
-    return html
-def get_history_gallery():
-    history = load_history()
-    gallery_items = []
-    for item in history[:30]:
-        if item['status'] == 'completed':
-            if item.get('output_path') and os.path.exists(item['output_path']):
-                caption = f"[{item['type'].upper()}] {item['prompt'][:35]}... | {item['timestamp']}"
-                gallery_items.append((item['output_path'], caption))
-    return gallery_items if gallery_items else []
-def search_history(keyword, filter_type, filter_status):
-    history = load_history()
-    filtered = history
-    if keyword:
-        filtered = [h for h in filtered if keyword.lower() in h.get('prompt', '').lower()]
-    if filter_type and filter_type != "all":
-        filtered = [h for h in filtered if h['type'] == filter_type]
-    if filter_status and filter_status != "all":
-        filtered = [h for h in filtered if h['status'] == filter_status]
-    return filtered
-def clear_all_history():
-    if os.path.exists(OUTPUTS_DIR):
-        shutil.rmtree(OUTPUTS_DIR)
-        os.makedirs(OUTPUTS_DIR)
-    if os.path.exists(DOWNLOADS_DIR):
-        shutil.rmtree(DOWNLOADS_DIR)
-        os.makedirs(DOWNLOADS_DIR)
-    with open(HISTORY_FILE, 'w', encoding='utf-8') as f:
-        json.dump([], f)
-    return "✅ Đã xóa toàn bộ lịch sử và files"
-def export_history_json():
-    history = load_history()
-    export_path = os.path.join(HISTORY_DIR, f"history_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
-    with open(export_path, 'w', encoding='utf-8') as f:
-        json.dump(history, f, indent=2, ensure_ascii=False)
-    return export_path
-# ============ PROCESSING UTILS ============
 def apply_mask_overlay(base_image, mask_data, opacity=0.5):
     if isinstance(base_image, np.ndarray):
         base_image = Image.fromarray(base_image)
     base_image = base_image.convert("RGBA")
@@ -322,10 +167,8 @@ def apply_mask_overlay(base_image, mask_data, opacity=0.5):
         mask_data = mask_data.cpu().numpy()
     mask_data = mask_data.astype(np.uint8)
-    if mask_data.ndim == 4:
-        mask_data = mask_data[0]
-    if mask_data.ndim == 3 and mask_data.shape[0] == 1:
-        mask_data = mask_data[0]
     num_masks = mask_data.shape[0] if mask_data.ndim == 3 else 1
     if mask_data.ndim == 2:
@@ -334,262 +177,44 @@ def apply_mask_overlay(base_image, mask_data, opacity=0.5):
     try:
         color_map = matplotlib.colormaps["rainbow"].resampled(max(num_masks, 1))
-    except:
         import matplotlib.cm as cm
         color_map = cm.get_cmap("rainbow").resampled(max(num_masks, 1))
     rgb_colors = [tuple(int(c * 255) for c in color_map(i)[:3]) for i in range(num_masks)]
     composite_layer = Image.new("RGBA", base_image.size, (0, 0, 0, 0))
-    for i, mask in enumerate(mask_data):
-        mask_img = Image.fromarray((mask * 255).astype(np.uint8))
-        if mask_img.size != base_image.size:
-            mask_img = mask_img.resize(base_image.size, resample=Image.NEAREST)
-        color_fill = Image.new("RGBA", base_image.size, rgb_colors[i] + (0,))
-        mask_alpha = mask_img.point(lambda v: int(v * opacity) if v > 0 else 0)
         color_fill.putalpha(mask_alpha)
         composite_layer = Image.alpha_composite(composite_layer, color_fill)
     return Image.alpha_composite(base_image, composite_layer).convert("RGB")
 def draw_points_on_image(image, points):
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     draw_img = image.copy()
     draw = ImageDraw.Draw(draw_img)
-    for x, y in points:
         r = 8
         draw.ellipse((x-r, y-r, x+r, y+r), fill="red", outline="white", width=4)
-    return draw_img
-# ============ JOB PROCESSORS ============
-def process_image_job(job):
-    start = datetime.now()
-    img = job['image']
-    if isinstance(img, str):
-        img = Image.open(img)
-    img = img.convert("RGB")
-    inputs = IMG_PROCESSOR(images=img, text=job['prompt'], return_tensors="pt").to(device)
-    with torch.no_grad():
-        outputs = IMG_MODEL(**inputs)
-    results = IMG_PROCESSOR.post_process_instance_segmentation(
-        outputs,
-        threshold=job.get('conf_thresh', 0.5),
-        mask_threshold=0.5,
-        target_sizes=inputs.get("original_sizes").tolist()
-    )[0]
-    masks = results['masks'].cpu().numpy()
-    scores = results['scores'].cpu().numpy()
-    annotations = [(m, f"{job['prompt']} ({s:.2f})") for m, s in zip(masks, scores)]
-    out_path = os.path.join(OUTPUTS_DIR, f"{job['id']}_overlay.jpg")
-    apply_mask_overlay(img, masks).save(out_path)
-    seg_files = []
-    for i, mask in enumerate(masks):
-        mask_bool = mask.astype(bool)
-        seg = Image.new("RGBA", img.size, (0, 0, 0, 0))
-        arr = np.array(img.convert("RGBA"))
-        arr[~mask_bool] = [0, 0, 0, 0]
-        seg = Image.fromarray(arr)
-        # Fix: Convert mask to uint8 before creating Image
-        mask_uint8 = (mask * 255).astype(np.uint8)
-        bbox = Image.fromarray(mask_uint8).getbbox()
-        if bbox:
-            seg_path = os.path.join(OUTPUTS_DIR, f"{job['id']}_obj_{i+1}.png")
-            seg.crop(bbox).save(seg_path)
-            seg_files.append(seg_path)
-    return {
-        'image': (img, annotations),
-        'output_path': out_path,
-        'segmented_files': seg_files,
-        'num_objects': len(seg_files),
-        'duration': f"{(datetime.now() - start).total_seconds():.2f}s"
-    }
-def process_video_job(job):
-    """Process video on CPU - slower but no timeout"""
-    start = datetime.now()
-    cap = cv2.VideoCapture(job['video'])
-    fps = cap.get(cv2.CAP_PROP_FPS)
-    w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    frames = []
-    limit = job.get('frame_limit', 60)
-    if limit == 0 or limit > 500:
-        limit = 500  # Higher limit for CPU since no GPU timeout
-    count = 0
-    while cap.isOpened():
-        ret, frame = cap.read()
-        if not ret or count >= limit:
-            break
-        frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-        count += 1
-    cap.release()
-    print(f"📹 Processing {len(frames)} frames on CPU (this will take longer)...")
-    # Process in chunks to manage memory
-    chunk_size = 30  # Smaller chunks for CPU
-    out_path = os.path.join(OUTPUTS_DIR, f"{job['id']}_overlay.mp4")
-    mask_path = os.path.join(OUTPUTS_DIR, f"{job['id']}_masks.mp4")
-    seg_path = os.path.join(OUTPUTS_DIR, f"{job['id']}_segmented.mp4")
-    writers = [
-        cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)),
-        cv2.VideoWriter(mask_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)),
-        cv2.VideoWriter(seg_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
-    ]
-    total = len(frames)
-    processed = 0
-    # Process frames in chunks
-    for chunk_start in range(0, total, chunk_size):
-        chunk_end = min(chunk_start + chunk_size, total)
-        chunk_frames = frames[chunk_start:chunk_end]
-        print(f"🔄 Processing chunk {chunk_start}-{chunk_end} ({len(chunk_frames)} frames)")
-        try:
-            # Initialize session for this chunk
-            session = VID_PROCESSOR.init_video_session(
-                video=chunk_frames,
-                inference_device=device
-            )
-            session = VID_PROCESSOR.add_text_prompt(inference_session=session, text=job['prompt'])
-            # Process chunk
-            for idx, out in enumerate(VID_MODEL.propagate_in_video_iterator(
-                inference_session=session,
-                max_frame_num_to_track=len(chunk_frames)
-            )):
-                try:
-                    proc = VID_PROCESSOR.postprocess_outputs(session, out)
-                    f_idx = out.frame_idx
-                    orig = Image.fromarray(chunk_frames[f_idx])
-                    if 'masks' in proc:
-                        masks = proc['masks']
-                        if masks.ndim == 4:
-                            masks = masks.squeeze(1)
-                        overlay = apply_mask_overlay(orig, masks)
-                        writers[0].write(cv2.cvtColor(np.array(overlay), cv2.COLOR_RGB2BGR))
-                        mask_np = masks.cpu().numpy() if isinstance(masks, torch.Tensor) else masks
-                        combined = np.zeros((h, w), dtype=np.uint8)
-                        for m in mask_np:
-                            if m.shape != (h, w):
-                                m = cv2.resize(m.astype(np.uint8), (w, h), interpolation=cv2.INTER_NEAREST)
-                            combined = np.maximum(combined, m)
-                        mask_frame = np.zeros((h, w, 3), dtype=np.uint8)
-                        mask_frame[combined > 0] = [255, 255, 255]
-                        writers[1].write(mask_frame)
-                        seg_arr = np.array(orig.convert("RGBA"))
-                        seg_arr[:, :, 3] = (combined * 255).astype(np.uint8)
-                        bgr = np.zeros((h, w, 3), dtype=np.uint8)
-                        bgr[:, :] = [0, 255, 0]
-                        for c in range(3):
-                            bgr[:, :, c] = np.where(combined > 0, seg_arr[:, :, 2-c], bgr[:, :, c])
-                        writers[2].write(bgr)
-                    else:
-                        orig_bgr = cv2.cvtColor(np.array(orig), cv2.COLOR_RGB2BGR)
-                        writers[0].write(orig_bgr)
-                        writers[1].write(np.zeros((h, w, 3), dtype=np.uint8))
-                        writers[2].write(orig_bgr)
-                    processed += 1
-                    progress = int((processed / total) * 100)
-                    processing_results[job['id']]['progress'] = progress
-                    if processed % 5 == 0:
-                        elapsed = (datetime.now() - start).total_seconds()
-                        avg_time = elapsed / processed
-                        remaining = (total - processed) * avg_time
-                        print(f"⏳ Progress: {progress}% ({processed}/{total}) | ETA: {remaining/60:.1f} min")
-                except Exception as e:
-                    print(f"⚠️ Error processing frame {f_idx}: {e}")
-                    orig_bgr = cv2.cvtColor(np.array(orig), cv2.COLOR_RGB2BGR)
-                    writers[0].write(orig_bgr)
-                    writers[1].write(np.zeros((h, w, 3), dtype=np.uint8))
-                    writers[2].write(orig_bgr)
-                    processed += 1
-            # Clear memory after each chunk
-            del session
-        except Exception as e:
-            print(f"❌ Error processing chunk: {e}")
-            for i in range(chunk_start, chunk_end):
-                if i < len(frames):
-                    orig_bgr = cv2.cvtColor(frames[i], cv2.COLOR_RGB2BGR)
-                    writers[0].write(orig_bgr)
-                    writers[1].write(np.zeros((h, w, 3), dtype=np.uint8))
-                    writers[2].write(orig_bgr)
-                    processed += 1
-    for w in writers:
-        w.release()
-    print(f"✅ Video completed: {processed} frames in {(datetime.now() - start).total_seconds():.2f}s")
-    return {
-        'output_path': out_path,
-        'mask_video_path': mask_path,
-        'segmented_video_path': seg_path,
-        'duration': f"{(datetime.now() - start).total_seconds():.2f}s"
-    }
-def process_click_job(job):
-    start = datetime.now()
-    img = job['image']
-    if isinstance(img, str):
-        img = Image.open(img)
-    inputs = TRK_PROCESSOR(
-        images=img,
-        input_points=[[job['points']]],
-        input_labels=[[job['labels']]],
-        return_tensors="pt"
-    ).to(device)
-    with torch.no_grad():
-        outputs = TRK_MODEL(**inputs, multimask_output=False)
-    masks = TRK_PROCESSOR.post_process_masks(
-        outputs.pred_masks.cpu(),
-        inputs["original_sizes"],
-        binarize=True
-    )[0]
-    result = apply_mask_overlay(img, masks[0])
-    result = draw_points_on_image(result, job['points'])
-    out_path = os.path.join(OUTPUTS_DIR, f"{job['id']}_result.jpg")
-    result.save(out_path)
-    return {
-        'image': result,
-        'output_path': out_path,
-        'duration': f"{(datetime.now() - start).total_seconds():.2f}s"
-    }
-# ============ BACKGROUND WORKER ============
 def background_worker():
     while True:
         try:
             job = processing_queue.get()
@@ -599,8 +224,6 @@ def background_worker():
             job_id = job['id']
             job_type = job['type']
-            print(f"🚀 Starting job {job_id[:8]} - Type: {job_type}")
             processing_results[job_id] = {'status': 'processing', 'progress': 0}
             try:
@@ -617,22 +240,17 @@ def background_worker():
                     'progress': 100
                 }
-                print(f"✅ Job {job_id[:8]} completed successfully")
                 save_history({
                     'id': job_id,
                     'type': job_type,
                     'prompt': job.get('prompt', 'N/A'),
                     'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                     'status': 'completed',
-                    **result
                 })
             except Exception as e:
-                print(f"❌ Job {job_id[:8]} failed: {str(e)}")
-                import traceback
-                traceback.print_exc()
                 processing_results[job_id] = {
                     'status': 'error',
                     'error': str(e),
@@ -647,513 +265,360 @@ def background_worker():
                     'error': str(e)
                 })
         except Exception as e:
-            print(f"⚠️ Worker error: {e}")
-            import traceback
-            traceback.print_exc()
-threading.Thread(target=background_worker, daemon=True).start()
-# ============ GRADIO UI ============
-custom_css = """
-#col-container { margin: 0 auto; max-width: 1400px; }
-#main-title h1 { font-size: 2.2em !important; font-weight: 700; background: linear-gradient(135deg, #4682B4, #764ba2); -webkit-background-clip: text; -webkit-text-fill-color: transparent; }
-.stat-card { padding: 24px; border-radius: 16px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; text-align: center; box-shadow: 0 4px 15px rgba(0,0,0,0.2); }
-.stat-number { font-size: 2.8em; font-weight: 800; margin: 12px 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.2); }
-.stat-label { font-size: 1.1em; opacity: 0.95; font-weight: 500; }
 """
-with gr.Blocks(title="SAM3 Segmentation") as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# **SAM3: Segment Anything Model 3** 🚀", elem_id="main-title")
-        gr.Markdown("### 💻 CPU Mode - Xử lý không giới hạn thời gian | Background processing | Download đầy đủ kết quả")
-        gr.Markdown("""
-        <div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 15px; border-radius: 10px; color: white; margin-bottom: 20px;'>
-        <strong>🔥 Đặc điểm CPU Mode:</strong><br>
-        ✅ Không bị timeout - xử lý video dài thoải mái<br>
-        ⏱️ Chậm hơn nhưng ổn định - tốc độ ~2-3 phút/frame<br>
-        🔋 Chạy background - submit job và làm việc khác<br>
-        💾 Tự động lưu lịch sử và download được
-        </div>
-        """)
         with gr.Tabs():
-            # ===== IMAGE TAB =====
             with gr.Tab("📷 Image Segmentation"):
                 with gr.Row():
                     with gr.Column(scale=1):
-                        click_input = gr.Image(
-                            type="pil",
-                            label="📤 Upload Image & Click Objects",
-                            interactive=True,
-                            height=450
-                        )
-                        gr.Markdown("""
-                        **📝 Hướng dẫn:**
-                        1. Upload ảnh
-                        2. Click vào đối tượng bạn muốn phân đoạn
-                        3. Kết quả hiển thị ngay lập tức
-                        4. Click "Clear" để reset và bắt đầu lại
-                        """)
-                        click_clear = gr.Button("🔄 Clear Points & Reset", variant="primary")
-                        click_pts = gr.State([])
-                        click_lbl = gr.State([])
-                    with gr.Column(scale=1):
-                        img_input = gr.Image(label="📤 Upload Image", type="pil", height=350)
-                        img_prompt = gr.Textbox(
-                            label="✍️ Text Prompt",
-                            placeholder="e.g., cat, person, car, building...",
-                            lines=2
-                        )
-                        with gr.Accordion("⚙️ Advanced Settings", open=False):
-                            img_conf = gr.Slider(0.0, 1.0, 0.45, 0.05, label="Confidence Threshold")
-                        img_submit = gr.Button("🚀 Submit Job (Background)", variant="primary", size="lg")
-                        img_check = gr.Button("🔍 Check Status", variant="secondary")
-                        img_job_id = gr.Textbox(label="Job ID", visible=False)
                     with gr.Column(scale=1.5):
-                        img_result = gr.AnnotatedImage(label="🎨 Segmented Result (Overlay)", height=410)
-                        img_status = gr.Textbox(label="📊 Status", interactive=False)
-                        with gr.Accordion("📦 Extracted Objects", open=True):
-                            gr.Markdown("**Các đối tượng được tách ra (PNG với nền trong suốt):**")
-                            img_gallery = gr.Gallery(
-                                label="Segmented Objects",
-                                columns=3,
-                                height=300,
-                                object_fit="contain"
-                            )
-                def submit_img(img, prompt, conf):
-                    if not img or not prompt:
-                        return None, "❌ Vui lòng cung cấp ảnh và prompt", "", []
-                    jid = str(uuid.uuid4())
-                    processing_queue.put({
-                        'id': jid,
-                        'type': 'image',
-                        'image': img,
-                        'prompt': prompt,
-                        'conf_thresh': conf
-                    })
-                    return None, f"✅ Đã thêm vào hàng chờ (ID: {jid[:8]}). Đang xử lý...", jid, []
-                def check_img(jid):
-                    if not jid or jid not in processing_results:
-                        return None, "❌ Không tìm thấy công việc", []
-                    r = processing_results[jid]
-                    if r['status'] == 'processing':
-                        return None, f"⏳ Đang xử lý... {r['progress']}%", []
-                    elif r['status'] == 'completed':
-                        res = r['result']
-                        gal = [f for f in res.get('segmented_files', []) if os.path.exists(f)]
-                        status = f"✅ Hoàn thành! Đã tách được {len(gal)} đối tượng | Thời gian: {res.get('duration', 'N/A')}"
-                        return res['image'], status, gal
-                    else:
-                        return None, f"❌ Lỗi: {r.get('error', 'Unknown')}", []
-                img_submit.click(
-                    fn=submit_img,
-                    inputs=[img_input, img_prompt, img_conf],
-                    outputs=[img_result, img_status, img_job_id, img_gallery]
                 )
-                img_check.click(
-                    fn=check_img,
-                    inputs=[img_job_id],
-                    outputs=[img_result, img_status, img_gallery]
                 )
-            # ===== VIDEO TAB =====
             with gr.Tab("🎥 Video Segmentation"):
                 with gr.Row():
                     with gr.Column():
-                        vid_input = gr.Video(label="📤 Upload Video", format="mp4", height=320)
-                        vid_prompt = gr.Textbox(
-                            label="✍️ Text Prompt",
-                            placeholder="e.g., person running, red car, dog...",
-                            lines=2
-                        )
-                        with gr.Accordion("⚙️ Settings", open=True):
-                            vid_frames = gr.Slider(
-                                10, 500, 60, 10,
-                                label="Max Frames",
-                                info="CPU mode: Có thể xử lý nhiều frames hơn, nhưng sẽ chậm hơn"
-                            )
-                            gr.Markdown("""
-                            **💻 CPU Processing Mode:**
-                            - ✅ **Không bị timeout** - xử lý bao nhiêu cũng được
-                            - ⏱️ **Chậm hơn GPU** - khoảng 2-3 phút/frame
-                            - 🔋 **Ổn định** - không crash, chạy nền background
-                            **⏱️ Thời gian ước tính:**
-                            - 30 frames: ~60-90 phút
-                            - 60 frames: ~2-3 giờ
-                            - 100 frames: ~3-5 giờ
-                            **💡 Khuyến nghị:**
-                            - Submit job và làm việc khác
-                            - Nhấn "Check Status" để xem tiến độ
-                            - Video sẽ được lưu khi hoàn thành
-                            """)
-                        vid_submit = gr.Button("🚀 Submit Job (Background)", variant="primary", size="lg")
-                        vid_check = gr.Button("🔍 Check Status", variant="secondary")
-                        vid_job_id = gr.Textbox(label="Job ID", visible=False)
-                    with gr.Column():
-                        gr.Markdown("### 📹 Video Outputs (3 versions)")
-                        with gr.Tabs():
-                            with gr.Tab("1️⃣ Overlay"):
-                                vid_overlay = gr.Video(label="Original + Color Masks")
-                                gr.Markdown("*Video gốc với màu mask phủ lên*")
-                            with gr.Tab("2️⃣ Masks Only"):
-                                vid_masks = gr.Video(label="White Masks on Black")
-                                gr.Markdown("*Chỉ hiển thị mask màu trắng trên nền đen*")
-                            with gr.Tab("3️⃣ Segmented"):
-                                vid_segmented = gr.Video(label="Green Screen Background")
-                                gr.Markdown("*Đối tượng với nền xanh lá (green screen)*")
-                        vid_status = gr.Textbox(label="📊 Status", interactive=False)
-                def submit_vid(vid, prompt, frames):
-                    if not vid or not prompt:
-                        return None, None, None, "❌ Vui lòng cung cấp video và prompt", ""
-                    jid = str(uuid.uuid4())
-                    processing_queue.put({
-                        'id': jid,
-                        'type': 'video',
-                        'video': vid,
-                        'prompt': prompt,
-                        'frame_limit': frames
-                    })
-                    return None, None, None, f"✅ Đã thêm vào hàng chờ (ID: {jid[:8]}). Đang xử lý...", jid
-                def check_vid(jid):
-                    if not jid or jid not in processing_results:
-                        return None, None, None, "❌ Không tìm thấy công việc"
-                    r = processing_results[jid]
-                    if r['status'] == 'processing':
-                        return None, None, None, f"⏳ Đang xử lý... {r['progress']}%"
-                    elif r['status'] == 'completed':
-                        res = r['result']
-                        status = f"""✅ Hoàn thành! Thời gian: {res.get('duration', 'N/A')}
-📹 3 video đã được tạo:
-• Overlay - Ảnh gốc với mask màu
-• Masks Only - Chỉ mask (trắng/đen)
-• Segmented - Đối tượng với green screen"""
-                        return (
-                            res.get('output_path'),
-                            res.get('mask_video_path'),
-                            res.get('segmented_video_path'),
-                            status
-                        )
-                    else:
-                        return None, None, None, f"❌ Lỗi: {r.get('error', 'Unknown')}"
-                vid_submit.click(
-                    fn=submit_vid,
-                    inputs=[vid_input, vid_prompt, vid_frames],
-                    outputs=[vid_overlay, vid_masks, vid_segmented, vid_status, vid_job_id]
-                )
-                vid_check.click(
-                    fn=check_vid,
-                    inputs=[vid_job_id],
-                    outputs=[vid_overlay, vid_masks, vid_segmented, vid_status]
-                )
-            # ===== CLICK TAB =====
-            with gr.Tab("👆 Click Segmentation"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        click_input = gr.Image(
-                            type="pil",
-                            label="📤 Upload Image & Click Objects",
-                            interactive=True,
-                            height=450
-                        )
-                        gr.Markdown("""
-                        **📝 Hướng dẫn:**
-                        1. Upload ảnh
-                        2. Click vào đối tượng bạn muốn phân đoạn
-                        3. Kết quả hiển thị ngay lập tức
-                        4. Click "Clear" để reset và bắt đầu lại
-                        """)
-                        click_clear = gr.Button("🔄 Clear Points & Reset", variant="primary")
-                        click_pts = gr.State([])
-                        click_lbl = gr.State([])
-                    with gr.Column(scale=1):
-                        click_output = gr.Image(
-                            type="pil",
-                            label="🎨 Result Preview",
-                            height=450,
-                            interactive=False
-                        )
-                        gr.Markdown("""
-                        **💡 Tips:**
-                        - Click vào trung tâm của đối tượng để có kết quả tốt nhất
-                        - Các điểm click được hiển thị bằng dấu chấm đỏ
-                        - Kết quả tự động cập nhật sau mỗi lần click
-                        """)
-                def on_click(img, evt: gr.SelectData, pts, lbl):
-                    if pts is None:
-                        pts = []
-                    if lbl is None:
-                        lbl = []
-                    pts.append([evt.index[0], evt.index[1]])
-                    lbl.append(1)
-                    jid = str(uuid.uuid4())
-                    try:
-                        res = process_click_job({
-                            'id': jid,
-                            'type': 'click',
-                            'image': img,
-                            'points': pts,
-                            'labels': lbl
-                        })
-                        return res['image'], pts, lbl
-                    except Exception as e:
-                        print(f"Click error: {e}")
-                        return img, pts, lbl
-                click_input.select(
-                    fn=on_click,
-                    inputs=[click_input, click_pts, click_lbl],
-                    outputs=[click_output, click_pts, click_lbl]
                 )
-                click_clear.click(
-                    fn=lambda: (None, [], []),
-                    outputs=[click_output, click_pts, click_lbl]
                 )
-            # ===== DOWNLOAD TAB =====
-            with gr.Tab("📥 Download Results"):
-                gr.Markdown("""
-                # 📦 Download Center
-                ### Tải về kết quả đã xử lý dưới dạng ZIP
-                """)
                 with gr.Row():
                     with gr.Column(scale=1):
-                        gr.Markdown("### 🎯 Select Job to Download")
-                        download_dropdown = gr.Dropdown(
-                            label="Chọn công việc đã hoàn thành",
-                            choices=get_downloadable_jobs(),
-                            interactive=True,
-                            scale=1
-                        )
                         with gr.Row():
-                            download_refresh = gr.Button("🔄 Refresh List", variant="secondary", scale=1)
-                            download_btn = gr.Button("📥 Download ZIP", variant="primary", size="lg", scale=2)
-                        download_status = gr.Textbox(label="Status", interactive=False)
                     with gr.Column(scale=1):
-                        gr.Markdown("### 📄 Download File")
-                        download_file = gr.File(label="Your ZIP file will appear here")
-                        gr.Markdown("""
-                        **📦 Package Contents:**
-                        **Image Jobs:**
-                        - `overlay.jpg` - Ảnh với mask màu
-                        - `objects/object_*.png` - Từng đối tượng riêng lẻ (PNG transparent)
-                        - `metadata.json` - Thông tin chi tiết
-                        **Video Jobs:**
-                        - `overlay_video.mp4` - Video với mask màu
-                        - `masks_only.mp4` - Chỉ mask trắng/đen
-                        - `segmented_video.mp4` - Video với green screen
-                        - `metadata.json` - Thông tin chi tiết
-                        **Click Jobs:**
-                        - `result.jpg` - Ảnh kết quả
-                        - `metadata.json` - Thông tin chi tiết
-                        """)
-                def do_download(job_id):
-                    if not job_id:
-                        return None, "❌ Vui lòng chọn một job"
-                    zip_path = create_download_package(job_id)
-                    if zip_path and os.path.exists(zip_path):
-                        size_mb = os.path.getsize(zip_path) / 1024 / 1024
-                        return zip_path, f"✅ Sẵn sàng tải về! Kích thước: {size_mb:.2f} MB"
-                    return None, "❌ Không thể tạo package. Job có thể đã bị xóa."
-                download_refresh.click(
-                    fn=lambda: gr.Dropdown(choices=get_downloadable_jobs()),
-                    outputs=[download_dropdown]
                 )
-                download_btn.click(
-                    fn=do_download,
-                    inputs=[download_dropdown],
-                    outputs=[download_file, download_status]
                 )
-            # ===== HISTORY & STATS TAB =====
-            with gr.Tab("📊 History & Statistics"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        gr.Markdown("### 📈 Statistics Dashboard")
-                        def update_stats():
-                            stats = get_history_stats()
-                            return (
-                                f"**{stats['total']}**\n\nTổng số jobs",
-                                f"**{stats['completed']}**\n\nHoàn thành",
-                                f"**{stats['errors']}**\n\nLỗi",
-                                f"**{stats['success_rate']}**\n\nTỷ lệ thành công"
-                            )
-                        with gr.Row():
-                            stat_total = gr.Markdown("**0**\n\nTổng số jobs", elem_classes=["stat-card"])
-                            stat_completed = gr.Markdown("**0**\n\nHoàn thành", elem_classes=["stat-card"])
-                        with gr.Row():
-                            stat_errors = gr.Markdown("**0**\n\nLỗi", elem_classes=["stat-card"])
-                            stat_success = gr.Markdown("**0%**\n\nTỷ lệ thành công", elem_classes=["stat-card"])
-                        gr.Markdown("### 🎯 Quick Actions")
-                        with gr.Row():
-                            btn_refresh = gr.Button("🔄 Refresh All", variant="primary")
-                            btn_export = gr.Button("📥 Export JSON", variant="secondary")
-                        btn_clear_all = gr.Button("🗑️ Clear All History", variant="stop")
-                        export_file = gr.File(label="Exported File", visible=False)
-                        clear_status = gr.Textbox(label="Status", interactive=False)
                 with gr.Row():
                     with gr.Column():
-                        gr.Markdown("### 📜 Processing History")
-                        with gr.Row():
-                            search_input = gr.Textbox(
-                                placeholder="🔍 Tìm kiếm theo prompt...",
-                                label="Search",
-                                scale=2
-                            )
-                            filter_type = gr.Dropdown(
-                                choices=["all", "image", "video", "click"],
-                                value="all",
-                                label="Loại",
-                                scale=1
-                            )
-                            filter_status = gr.Dropdown(
-                                choices=["all", "completed", "error"],
-                                value="all",
-                                label="Trạng thái",
-                                scale=1
-                            )
-                        history_table = gr.HTML(value=format_history_table())
-                with gr.Row():
-                    with gr.Column():
-                        gr.Markdown("### 🖼️ Gallery - Recent Outputs")
-                        history_gallery = gr.Gallery(
-                            value=get_history_gallery(),
-                            label="Kết quả gần đây",
-                            columns=4,
-                            height=400,
-                            object_fit="contain"
-                        )
-                def refresh_all():
-                    return (
-                        *update_stats(),
-                        format_history_table(),
-                        get_history_gallery()
-                    )
-                btn_refresh.click(
-                    fn=refresh_all,
-                    outputs=[stat_total, stat_completed, stat_errors, stat_success, history_table, history_gallery]
-                )
-                btn_export.click(
-                    fn=export_history_json,
-                    outputs=[export_file]
-                )
-                btn_clear_all.click(
-                    fn=clear_all_history,
-                    outputs=[clear_status]
-                ).then(
-                    fn=refresh_all,
-                    outputs=[stat_total, stat_completed, stat_errors, stat_success, history_table, history_gallery]
-                )
-                def filter_and_display(keyword, ftype, fstatus):
-                    filtered = search_history(keyword, ftype, fstatus)
-                    if not filtered:
-                        return "<p style='text-align:center; color:#666; padding:40px;'>🔍 Không tìm thấy kết quả phù hợp</p>"
-                    return format_history_table()
-                search_input.change(
-                    fn=filter_and_display,
-                    inputs=[search_input, filter_type, filter_status],
-                    outputs=[history_table]
-                )
-                filter_type.change(
-                    fn=filter_and_display,
-                    inputs=[search_input, filter_type, filter_status],
-                    outputs=[history_table]
-                )
-                filter_status.change(
-                    fn=filter_and_display,
-                    inputs=[search_input, filter_type, filter_status],
-                    outputs=[history_table]
                 )
-        # Footer
-        gr.Markdown("""
-        ---
-        **SAM3: Segment Anything Model 3** | Powered by DiffusionWave | Background Processing Enabled | No Timeout Limits
-        """)
 if __name__ == "__main__":
-    print("🚀 Starting SAM3 Application...")
-    print(f"📁 Output directory: {OUTPUTS_DIR}")
-    print(f"📥 Downloads directory: {DOWNLOADS_DIR}")
-    print(f"📊 History file: {HISTORY_FILE}")
     demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        max_threads=10,
-        show_error=True,
-        share=False,
         css=custom_css,
-        theme=app_theme
     )

 import os
 import cv2
+import tempfile
 import spaces
 import gradio as gr
 import numpy as np
 import torch
 import matplotlib
+import matplotlib.pyplot as plt
 from PIL import Image, ImageDraw
 from typing import Iterable
 from gradio.themes import Soft
 import threading
 import queue
 import uuid
 # ============ THEME SETUP ============
 colors.steel_blue = colors.Color(
     name="steel_blue",
+    c50="#EBF3F8",
+    c100="#D3E5F0",
+    c200="#A8CCE1",
+    c300="#7DB3D2",
+    c400="#529AC3",
+    c500="#4682B4",
+    c600="#3E72A0",
+    c700="#36638C",
+    c800="#2E5378",
+    c900="#264364",
+    c950="#1E3450",
 )
 class CustomBlueTheme(Soft):
+    def __init__(
+        self,
+        *,
+        primary_hue: colors.Color | str = colors.gray,
+        secondary_hue: colors.Color | str = colors.steel_blue,
+        neutral_hue: colors.Color | str = colors.slate,
+        text_size: sizes.Size | str = sizes.text_lg,
+        font: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
+        ),
+        font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
+        ),
+    ):
+        super().__init__(
+            primary_hue=primary_hue,
+            secondary_hue=secondary_hue,
+            neutral_hue=neutral_hue,
+            text_size=text_size,
+            font=font,
+            font_mono=font_mono,
+        )
         super().set(
             background_fill_primary="*primary_50",
             background_fill_primary_dark="*primary_900",
 app_theme = CustomBlueTheme()
 # ============ GLOBAL SETUP ============
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"🖥️ Using compute device: {device}")
+# History storage
 HISTORY_DIR = "processing_history"
+os.makedirs(HISTORY_DIR, exist_ok=True)
 HISTORY_FILE = os.path.join(HISTORY_DIR, "history.json")
+# Background processing queue
 processing_queue = queue.Queue()
 processing_results = {}
 # Load models
+print("⏳ Loading SAM3 Models permanently into memory...")
 try:
+    print("   ... Loading Image Text Model")
     IMG_MODEL = Sam3Model.from_pretrained("DiffusionWave/sam3").to(device)
     IMG_PROCESSOR = Sam3Processor.from_pretrained("DiffusionWave/sam3")
+    print("   ... Loading Image Tracker Model")
     TRK_MODEL = Sam3TrackerModel.from_pretrained("DiffusionWave/sam3").to(device)
     TRK_PROCESSOR = Sam3TrackerProcessor.from_pretrained("DiffusionWave/sam3")
+    print("   ... Loading Video Model")
+    VID_MODEL = Sam3VideoModel.from_pretrained("DiffusionWave/sam3").to(device, dtype=torch.bfloat16)
     VID_PROCESSOR = Sam3VideoProcessor.from_pretrained("DiffusionWave/sam3")
+    print("✅ All Models loaded successfully!")
 except Exception as e:
+    print(f"❌ CRITICAL ERROR LOADING MODELS: {e}")
     IMG_MODEL = IMG_PROCESSOR = TRK_MODEL = TRK_PROCESSOR = VID_MODEL = VID_PROCESSOR = None
 # ============ HISTORY MANAGEMENT ============
 def load_history():
+    """Load processing history from JSON file"""
     if os.path.exists(HISTORY_FILE):
         try:
+            with open(HISTORY_FILE, 'r') as f:
                 return json.load(f)
         except:
             return []
     return []
+def save_history(history_item):
+    """Save a new history item"""
     history = load_history()
+    history.insert(0, history_item)  # Add to beginning
+    history = history[:100]  # Keep last 100 items
+    with open(HISTORY_FILE, 'w') as f:
+        json.dump(history, f, indent=2)
+def get_history_display():
+    """Format history for display"""
     history = load_history()
     if not history:
+        return "Chưa có lịch sử xử lý nào"
+    display_text = ""
+    for i, item in enumerate(history[:50], 1):
+        status_emoji = "✅" if item['status'] == 'completed' else "❌"
+        display_text += f"{status_emoji} **{item['type'].upper()}** - {item['timestamp']}\n"
+        display_text += f"   Prompt: {item['prompt']}\n"
         if item.get('output_path'):
+            display_text += f"   File: `{os.path.basename(item['output_path'])}`\n"
+        display_text += "\n"
+    return display_text
+# ============ UTILITY FUNCTIONS ============
 def apply_mask_overlay(base_image, mask_data, opacity=0.5):
+    """Draws segmentation masks on top of an image."""
     if isinstance(base_image, np.ndarray):
         base_image = Image.fromarray(base_image)
     base_image = base_image.convert("RGBA")
         mask_data = mask_data.cpu().numpy()
     mask_data = mask_data.astype(np.uint8)
+    if mask_data.ndim == 4: mask_data = mask_data[0]
+    if mask_data.ndim == 3 and mask_data.shape[0] == 1: mask_data = mask_data[0]
     num_masks = mask_data.shape[0] if mask_data.ndim == 3 else 1
     if mask_data.ndim == 2:
     try:
         color_map = matplotlib.colormaps["rainbow"].resampled(max(num_masks, 1))
+    except AttributeError:
         import matplotlib.cm as cm
         color_map = cm.get_cmap("rainbow").resampled(max(num_masks, 1))
     rgb_colors = [tuple(int(c * 255) for c in color_map(i)[:3]) for i in range(num_masks)]
     composite_layer = Image.new("RGBA", base_image.size, (0, 0, 0, 0))
+    for i, single_mask in enumerate(mask_data):
+        mask_bitmap = Image.fromarray((single_mask * 255).astype(np.uint8))
+        if mask_bitmap.size != base_image.size:
+            mask_bitmap = mask_bitmap.resize(base_image.size, resample=Image.NEAREST)
+        fill_color = rgb_colors[i]
+        color_fill = Image.new("RGBA", base_image.size, fill_color + (0,))
+        mask_alpha = mask_bitmap.point(lambda v: int(v * opacity) if v > 0 else 0)
         color_fill.putalpha(mask_alpha)
         composite_layer = Image.alpha_composite(composite_layer, color_fill)
     return Image.alpha_composite(base_image, composite_layer).convert("RGB")
 def draw_points_on_image(image, points):
+    """Draws red dots on the image to indicate click locations."""
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
     draw_img = image.copy()
     draw = ImageDraw.Draw(draw_img)
+    for pt in points:
+        x, y = pt
         r = 8
         draw.ellipse((x-r, y-r, x+r, y+r), fill="red", outline="white", width=4)
+    return draw_img
+# ============ BACKGROUND PROCESSING WORKER ============
 def background_worker():
+    """Background thread that processes jobs from queue"""
     while True:
         try:
             job = processing_queue.get()
             job_id = job['id']
             job_type = job['type']
             processing_results[job_id] = {'status': 'processing', 'progress': 0}
             try:
                     'progress': 100
                 }
+                # Save to history
                 save_history({
                     'id': job_id,
                     'type': job_type,
                     'prompt': job.get('prompt', 'N/A'),
                     'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                     'status': 'completed',
+                    'output_path': result.get('output_path')
                 })
             except Exception as e:
                 processing_results[job_id] = {
                     'status': 'error',
                     'error': str(e),
                     'error': str(e)
                 })
         except Exception as e:
+            print(f"Worker error: {e}")
+# Start background worker
+worker_thread = threading.Thread(target=background_worker, daemon=True)
+worker_thread.start()
+# ============ JOB PROCESSORS ============
+@spaces.GPU
+def process_image_job(job):
+    """Process image segmentation job"""
+    source_img = job['image']
+    text_query = job['prompt']
+    conf_thresh = job.get('conf_thresh', 0.5)
+    if isinstance(source_img, str):
+        source_img = Image.open(source_img)
+    pil_image = source_img.convert("RGB")
+    model_inputs = IMG_PROCESSOR(images=pil_image, text=text_query, return_tensors="pt").to(device)
+    with torch.no_grad():
+        inference_output = IMG_MODEL(**model_inputs)
+    processed_results = IMG_PROCESSOR.post_process_instance_segmentation(
+        inference_output,
+        threshold=conf_thresh,
+        mask_threshold=0.5,
+        target_sizes=model_inputs.get("original_sizes").tolist()
+    )[0]
+    annotation_list = []
+    raw_masks = processed_results['masks'].cpu().numpy()
+    raw_scores = processed_results['scores'].cpu().numpy()
+    for idx, mask_array in enumerate(raw_masks):
+        label_str = f"{text_query} ({raw_scores[idx]:.2f})"
+        annotation_list.append((mask_array, label_str))
+    # Save output
+    output_path = os.path.join(HISTORY_DIR, f"{job['id']}_result.jpg")
+    result_img = apply_mask_overlay(pil_image, raw_masks)
+    result_img.save(output_path)
+    return {
+        'image': (pil_image, annotation_list),
+        'output_path': output_path
+    }
+@spaces.GPU
+def process_video_job(job):
+    """Process video segmentation job"""
+    source_vid = job['video']
+    text_query = job['prompt']
+    frame_limit = job.get('frame_limit', 60)
+    video_cap = cv2.VideoCapture(source_vid)
+    vid_fps = video_cap.get(cv2.CAP_PROP_FPS)
+    vid_w = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    vid_h = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    video_frames = []
+    counter = 0
+    while video_cap.isOpened():
+        ret, frame = video_cap.read()
+        if not ret or (frame_limit > 0 and counter >= frame_limit): break
+        video_frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        counter += 1
+    video_cap.release()
+    session = VID_PROCESSOR.init_video_session(video=video_frames, inference_device=device, dtype=torch.bfloat16)
+    session = VID_PROCESSOR.add_text_prompt(inference_session=session, text=text_query)
+    output_path = os.path.join(HISTORY_DIR, f"{job['id']}_result.mp4")
+    video_writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), vid_fps, (vid_w, vid_h))
+    total_frames = len(video_frames)
+    for frame_idx, model_out in enumerate(VID_MODEL.propagate_in_video_iterator(inference_session=session, max_frame_num_to_track=total_frames)):
+        post_processed = VID_PROCESSOR.postprocess_outputs(session, model_out)
+        f_idx = model_out.frame_idx
+        original_pil = Image.fromarray(video_frames[f_idx])
+        if 'masks' in post_processed:
+            detected_masks = post_processed['masks']
+            if detected_masks.ndim == 4: detected_masks = detected_masks.squeeze(1)
+            final_frame = apply_mask_overlay(original_pil, detected_masks)
+        else:
+            final_frame = original_pil
+        video_writer.write(cv2.cvtColor(np.array(final_frame), cv2.COLOR_RGB2BGR))
+        # Update progress
+        progress = int((frame_idx + 1) / total_frames * 100)
+        processing_results[job['id']]['progress'] = progress
+    video_writer.release()
+    return {'output_path': output_path}
+@spaces.GPU
+def process_click_job(job):
+    """Process click segmentation job"""
+    input_image = job['image']
+    points_state = job['points']
+    labels_state = job['labels']
+    if isinstance(input_image, str):
+        input_image = Image.open(input_image)
+    input_points = [[points_state]]
+    input_labels = [[labels_state]]
+    inputs = TRK_PROCESSOR(images=input_image, input_points=input_points, input_labels=input_labels, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = TRK_MODEL(**inputs, multimask_output=False)
+    masks = TRK_PROCESSOR.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"], binarize=True)[0]
+    final_img = apply_mask_overlay(input_image, masks[0])
+    final_img = draw_points_on_image(final_img, points_state)
+    output_path = os.path.join(HISTORY_DIR, f"{job['id']}_result.jpg")
+    final_img.save(output_path)
+    return {
+        'image': final_img,
+        'output_path': output_path
+    }
+# ============ UI HANDLERS ============
+def submit_image_job(source_img, text_query, conf_thresh):
+    """Submit image segmentation job to background queue"""
+    if source_img is None or not text_query:
+        return None, "❌ Vui lòng cung cấp ảnh và prompt", ""
+    job_id = str(uuid.uuid4())
+    job = {
+        'id': job_id,
+        'type': 'image',
+        'image': source_img,
+        'prompt': text_query,
+        'conf_thresh': conf_thresh
+    }
+    processing_queue.put(job)
+    return None, f"✅ Đã thêm vào hàng chờ (ID: {job_id[:8]}). Đang xử lý...", job_id
+def check_image_status(job_id):
+    """Check status of image processing job"""
+    if not job_id or job_id not in processing_results:
+        return None, "Không tìm thấy công việc"
+    result = processing_results[job_id]
+    if result['status'] == 'processing':
+        return None, f"⏳ Đang xử lý... {result['progress']}%"
+    elif result['status'] == 'completed':
+        return result['result']['image'], "✅ Hoàn thành!"
+    else:
+        return None, f"❌ Lỗi: {result.get('error', 'Unknown')}"
+def submit_video_job(source_vid, text_query, frame_limit, time_limit):
+    """Submit video segmentation job to background queue"""
+    if not source_vid or not text_query:
+        return None, "❌ Vui lòng cung cấp video và prompt", ""
+    job_id = str(uuid.uuid4())
+    job = {
+        'id': job_id,
+        'type': 'video',
+        'video': source_vid,
+        'prompt': text_query,
+        'frame_limit': frame_limit,
+        'time_limit': time_limit
+    }
+    processing_queue.put(job)
+    return None, f"✅ Đã thêm vào hàng chờ (ID: {job_id[:8]}). Đang xử lý...", job_id
+def check_video_status(job_id):
+    """Check status of video processing job"""
+    if not job_id or job_id not in processing_results:
+        return None, "Không tìm thấy công việc"
+    result = processing_results[job_id]
+    if result['status'] == 'processing':
+        return None, f"⏳ Đang xử lý... {result['progress']}%"
+    elif result['status'] == 'completed':
+        return result['result']['output_path'], "✅ Hoàn thành!"
+    else:
+        return None, f"❌ Lỗi: {result.get('error', 'Unknown')}"
+def image_click_handler(image, evt: gr.SelectData, points_state, labels_state):
+    """Handle click events for interactive segmentation"""
+    x, y = evt.index
+    if points_state is None: points_state = []
+    if labels_state is None: labels_state = []
+    points_state.append([x, y])
+    labels_state.append(1)
+    # Process immediately (can be changed to background if needed)
+    job_id = str(uuid.uuid4())
+    job = {
+        'id': job_id,
+        'type': 'click',
+        'image': image,
+        'points': points_state,
+        'labels': labels_state
+    }
+    try:
+        result = process_click_job(job)
+        return result['image'], points_state, labels_state
+    except Exception as e:
+        print(f"Click error: {e}")
+        return image, points_state, labels_state
+# ============ GRADIO INTERFACE ============
+custom_css="""
+#col-container { margin: 0 auto; max-width: 1200px; }
+#main-title h1 { font-size: 2.1em !important; }
+.history-box { max-height: 600px; overflow-y: auto; }
 """
+with gr.Blocks(css=custom_css, theme=app_theme) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# **SAM3: Segment Anything Model 3** 🚀", elem_id="main-title")
+        gr.Markdown("Xử lý ảnh/video với **background processing** - không cần chờ đợi!")
         with gr.Tabs():
+            # ===== IMAGE SEGMENTATION TAB =====
             with gr.Tab("📷 Image Segmentation"):
                 with gr.Row():
                     with gr.Column(scale=1):
+                        image_input = gr.Image(label="Upload Image", type="pil", height=350)
+                        txt_prompt_img = gr.Textbox(label="Text Prompt", placeholder="e.g., cat, face, car wheel")
+                        with gr.Accordion("Advanced Settings", open=False):
+                            conf_slider = gr.Slider(0.0, 1.0, value=0.45, step=0.05, label="Confidence Threshold")
+                        btn_submit_img = gr.Button("🚀 Submit Job (Background)", variant="primary")
+                        btn_check_img = gr.Button("🔍 Check Status", variant="secondary")
+                        job_id_img = gr.Textbox(label="Job ID", visible=False)
                     with gr.Column(scale=1.5):
+                        image_result = gr.AnnotatedImage(label="Segmented Result", height=410)
+                        status_img = gr.Textbox(label="Status", interactive=False)
+                btn_submit_img.click(
+                    fn=submit_image_job,
+                    inputs=[image_input, txt_prompt_img, conf_slider],
+                    outputs=[image_result, status_img, job_id_img]
                 )
+                btn_check_img.click(
+                    fn=check_image_status,
+                    inputs=[job_id_img],
+                    outputs=[image_result, status_img]
                 )
+            # ===== VIDEO SEGMENTATION TAB =====
             with gr.Tab("🎥 Video Segmentation"):
                 with gr.Row():
                     with gr.Column():
+                        video_input = gr.Video(label="Upload Video", format="mp4", height=320)
+                        txt_prompt_vid = gr.Textbox(label="Text Prompt", placeholder="e.g., person running, red car")
+                        with gr.Row():
+                            frame_limiter = gr.Slider(10, 500, value=60, step=10, label="Max Frames")
+                            time_limiter = gr.Radio([60, 120, 180], value=60, label="Timeout (seconds)")
+                        btn_submit_vid = gr.Button("🚀 Submit Job (Background)", variant="primary")
+                        btn_check_vid = gr.Button("🔍 Check Status", variant="secondary")
+                        job_id_vid = gr.Textbox(label="Job ID", visible=False)
+                    with gr.Column():
+                        video_result = gr.Video(label="Processed Video")
+                        status_vid = gr.Textbox(label="Status", interactive=False)
+                btn_submit_vid.click(
+                    fn=submit_video_job,
+                    inputs=[video_input, txt_prompt_vid, frame_limiter, time_limiter],
+                    outputs=[video_result, status_vid, job_id_vid]
                 )
+                btn_check_vid.click(
+                    fn=check_video_status,
+                    inputs=[job_id_vid],
+                    outputs=[video_result, status_vid]
                 )
+            # ===== CLICK SEGMENTATION TAB =====
+            with gr.Tab("👆 Click Segmentation"):
                 with gr.Row():
                     with gr.Column(scale=1):
+                        img_click_input = gr.Image(type="pil", label="Upload Image", interactive=True, height=450)
+                        gr.Markdown("**Hướng dẫn:** Click vào đối tượng bạn muốn phân đoạn")
                         with gr.Row():
+                            img_click_clear = gr.Button("🔄 Clear Points & Reset", variant="primary")
+                        st_click_points = gr.State([])
+                        st_click_labels = gr.State([])
                     with gr.Column(scale=1):
+                        img_click_output = gr.Image(type="pil", label="Result Preview", height=450, interactive=False)
+                img_click_input.select(
+                    image_click_handler,
+                    inputs=[img_click_input, st_click_points, st_click_labels],
+                    outputs=[img_click_output, st_click_points, st_click_labels]
                 )
+                img_click_clear.click(
+                    lambda: (None, [], []),
+                    outputs=[img_click_output, st_click_points, st_click_labels]
                 )
+            # ===== HISTORY TAB =====
+            with gr.Tab("📜 Lịch Sử Xử Lý"):
                 with gr.Row():
                     with gr.Column():
+                        btn_refresh_history = gr.Button("🔄 Refresh History", variant="primary")
+                        history_display = gr.Markdown(value=get_history_display(), elem_classes="history-box")
+                        with gr.Accordion("Hướng dẫn", open=False):
+                            gr.Markdown("""
+                            ### Lịch sử lưu:
+                            - ✅ **Hoàn thành**: File đã được xử lý thành công
+                            - ❌ **Lỗi**: Xử lý thất bại
+                            - Tất cả file output được lưu trong thư mục `processing_history/`
+                            - Hệ thống giữ lại 100 lịch sử gần nhất
+                            """)
+                btn_refresh_history.click(
+                    fn=get_history_display,
+                    outputs=[history_display]
                 )
+            # ===== BATCH PROCESSING TAB =====
+            with gr.Tab("⚙️ Batch Processing"):
+                gr.Markdown("### Xử lý hàng loạt (Coming Soon)")
+                gr.Markdown("""
+                Tính năng này sẽ cho phép bạn:
+                - Upload nhiều ảnh/video cùng lúc
+                - Tự động xử lý tuần tự
+                - Download tất cả kết quả dưới dạng ZIP
+                """)
 if __name__ == "__main__":
     demo.launch(
         css=custom_css,
+        theme=app_theme,
+        ssr_mode=False,
+        mcp_server=True,
+        show_error=True
     )