"""
WeDetect: Open-Vocabulary Object Detection Demo
HuggingFace Spaces Application

This app provides an interactive interface for WeDetect, a fast open-vocabulary
object detection model that uses Chinese class names internally.

Features:
- Upload any image for object detection
- Enter class names in English OR Chinese
- Automatic English→Chinese translation with editable preview
- Adjustable confidence threshold
- Visual results with bounding boxes

Compatible with:
- Gradio 5.50.0+ / 6.x
- huggingface_hub 1.x
"""

import os
import sys
import subprocess

# ============================================================================
# INSTALL MMCV/MMDET/MMENGINE WITH CUDA EXTENSIONS
# ============================================================================
# mmcv needs pre-built CUDA extensions. We must install from OpenMMLab's
# wheel index with the correct CUDA and PyTorch version.
# ============================================================================

def get_torch_cuda_version():
    """Detect PyTorch and CUDA versions for wheel selection."""
    import torch
    
    torch_version = torch.__version__.split('+')[0]  # e.g., "2.1.0"
    torch_major_minor = '.'.join(torch_version.split('.')[:2])  # e.g., "2.1"
    
    if torch.cuda.is_available():
        cuda_version = torch.version.cuda  # e.g., "12.1"
        cuda_tag = 'cu' + cuda_version.replace('.', '')[:3]  # e.g., "cu121"
    else:
        cuda_tag = 'cpu'
    
    return torch_major_minor, cuda_tag


def install_mm_packages():
    """Install mmcv, mmdet, mmengine with proper CUDA extensions."""
    
    # First install mmengine (no CUDA extensions needed)
    try:
        import mmengine
        print(f"✅ mmengine already installed: {mmengine.__version__}")
    except ImportError:
        print("📦 Installing mmengine...")
        subprocess.run(
            [sys.executable, "-m", "pip", "install", "mmengine==0.10.7"],
            capture_output=True, text=True
        )
        print("✅ mmengine installed")
    
    # Install mmcv with CUDA extensions from OpenMMLab wheel index
    try:
        import mmcv
        from mmcv.ops import roi_align  # Test if extensions work
        print(f"✅ mmcv already installed with extensions: {mmcv.__version__}")
    except (ImportError, ModuleNotFoundError) as e:
        print(f"📦 Installing mmcv with CUDA extensions... (reason: {e})")
        
        # Get versions for wheel selection
        torch_version, cuda_tag = get_torch_cuda_version()
        print(f"   Detected: PyTorch {torch_version}, CUDA tag: {cuda_tag}")
        
        # OpenMMLab wheel index URL
        wheel_index = f"https://download.openmmlab.com/mmcv/dist/{cuda_tag}/torch{torch_version}/index.html"
        print(f"   Wheel index: {wheel_index}")
        
        # Uninstall any existing broken mmcv
        subprocess.run(
            [sys.executable, "-m", "pip", "uninstall", "mmcv", "-y"],
            capture_output=True, text=True
        )
        
        # Install from OpenMMLab wheel index
        result = subprocess.run(
            [sys.executable, "-m", "pip", "install", "mmcv==2.1.0", "-f", wheel_index],
            capture_output=True, text=True
        )
        
        if result.returncode != 0:
            print(f"⚠️ First attempt failed: {result.stderr}")
            # Try alternative CUDA versions
            for alt_cuda in ["cu121", "cu118", "cu117"]:
                if alt_cuda == cuda_tag:
                    continue
                alt_wheel_index = f"https://download.openmmlab.com/mmcv/dist/{alt_cuda}/torch{torch_version}/index.html"
                print(f"   Trying alternative: {alt_wheel_index}")
                result = subprocess.run(
                    [sys.executable, "-m", "pip", "install", "mmcv==2.1.0", "-f", alt_wheel_index],
                    capture_output=True, text=True
                )
                if result.returncode == 0:
                    break
        
        print("✅ mmcv installed")
    
    # Install mmdet
    try:
        import mmdet
        print(f"✅ mmdet already installed: {mmdet.__version__}")
    except ImportError:
        print("📦 Installing mmdet...")
        subprocess.run(
            [sys.executable, "-m", "pip", "install", "mmdet==3.3.0"],
            capture_output=True, text=True
        )
        print("✅ mmdet installed")


# Run installation before other imports
print("🔧 Setting up MM packages with CUDA extensions...")
install_mm_packages()

# Verify installation
print("🔍 Verifying mmcv extensions...")
try:
    from mmcv.ops import roi_align
    print("✅ mmcv._ext loaded successfully!")
except Exception as e:
    print(f"⚠️ Warning: mmcv extensions may not be fully loaded: {e}")

# ============================================================================
# STANDARD IMPORTS (after MM packages are installed)
# ============================================================================
import tempfile
import colorsys
from typing import List, Tuple, Optional

import gradio as gr
import spaces
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from huggingface_hub import hf_hub_download

# ============================================================================
# CONFIGURATION
# ============================================================================
DEFAULT_MODEL = "large"  # Options: "tiny", "base", "large"
REPO_ID = "fushh7/WeDetect"

MODEL_INFO = {
    "tiny": {"file": "wedetect_tiny.pth", "config": "wedetect_tiny"},
    "base": {"file": "wedetect_base.pth", "config": "wedetect_base"},
    "large": {"file": "wedetect_large.pth", "config": "wedetect_large"},
}

# ============================================================================
# ENGLISH TO CHINESE DICTIONARY (~200 common objects)
# ============================================================================
ENGLISH_TO_CHINESE = {
    # People
    "person": "人", "man": "男人", "woman": "女人", "child": "儿童", "kid": "小孩",
    "baby": "婴儿", "boy": "男孩", "girl": "女孩", "people": "人", "human": "人",
    
    # Animals
    "dog": "狗", "cat": "猫", "bird": "鸟", "fish": "鱼", "horse": "马",
    "cow": "牛", "sheep": "羊", "pig": "猪", "chicken": "鸡", "duck": "鸭",
    "elephant": "大象", "bear": "熊", "zebra": "斑马", "giraffe": "长颈鹿",
    "lion": "狮子", "tiger": "老虎", "monkey": "猴子", "rabbit": "兔子",
    "mouse": "老鼠", "snake": "蛇", "turtle": "乌龟", "frog": "青蛙",
    "butterfly": "蝴蝶", "bee": "蜜蜂", "spider": "蜘蛛", "ant": "蚂蚁",
    
    # Vehicles
    "car": "车", "truck": "卡车", "bus": "公交车", "train": "火车",
    "airplane": "飞机", "plane": "飞机", "boat": "船", "ship": "船",
    "bicycle": "自行车", "bike": "自行车", "motorcycle": "摩托车",
    "helicopter": "直升机", "taxi": "出租车", "ambulance": "救护车",
    "fire truck": "消防车", "police car": "警车", "van": "面包车",
    
    # Furniture
    "chair": "椅子", "table": "桌子", "desk": "书桌", "bed": "床",
    "couch": "沙发", "sofa": "沙发", "bench": "长凳", "cabinet": "柜子",
    "shelf": "架子", "drawer": "抽屉", "wardrobe": "衣柜", "mirror": "镜子",
    
    # Electronics
    "tv": "电视", "television": "电视", "computer": "电脑", "laptop": "笔记本电脑",
    "phone": "手机", "cell phone": "手机", "mobile phone": "手机",
    "tablet": "平板电脑", "keyboard": "键盘", "mouse": "鼠标",
    "monitor": "显示器", "screen": "屏幕", "camera": "相机", "speaker": "音箱",
    "headphones": "耳机", "microphone": "麦克风", "remote": "遥控器",
    
    # Kitchen items
    "refrigerator": "冰箱", "fridge": "冰箱", "oven": "烤箱",
    "microwave": "微波炉", "toaster": "烤面包机", "blender": "搅拌机",
    "kettle": "水壶", "pot": "锅", "pan": "平底锅", "bowl": "碗",
    "plate": "盘子", "cup": "杯子", "mug": "马克杯", "glass": "玻璃杯",
    "bottle": "瓶子", "fork": "叉子", "knife": "刀", "spoon": "勺子",
    "chopsticks": "筷子",
    
    # Food
    "apple": "苹果", "banana": "香蕉", "orange": "橙子", "grape": "葡萄",
    "strawberry": "草莓", "watermelon": "西瓜", "pizza": "披萨",
    "hamburger": "汉堡", "sandwich": "三明治", "hot dog": "热狗",
    "cake": "蛋糕", "bread": "面包", "rice": "米饭", "noodles": "面条",
    "egg": "鸡蛋", "meat": "肉", "vegetable": "蔬菜", "fruit": "水果",
    
    # Clothing
    "shirt": "衬衫", "pants": "裤子", "dress": "连衣裙", "skirt": "裙子",
    "jacket": "夹克", "coat": "外套", "sweater": "毛衣", "hat": "帽子",
    "cap": "帽子", "shoe": "鞋", "shoes": "鞋", "boot": "靴子",
    "sock": "袜子", "glove": "手套", "scarf": "围巾", "tie": "领带",
    "belt": "腰带", "bag": "包", "backpack": "背包", "purse": "钱包",
    "wallet": "钱包", "watch": "手表", "glasses": "眼镜", "sunglasses": "太阳镜",
    
    # Sports
    "ball": "球", "football": "足球", "soccer ball": "足球",
    "basketball": "篮球", "baseball": "棒球", "tennis ball": "网球",
    "golf ball": "高尔夫球", "volleyball": "排球",
    "tennis racket": "网球拍", "skateboard": "滑板", "surfboard": "冲浪板",
    "ski": "滑雪板", "snowboard": "单板滑雪", "frisbee": "飞盘",
    
    # Office/School
    "book": "书", "notebook": "笔记本", "pen": "笔", "pencil": "铅笔",
    "paper": "纸", "scissors": "剪刀", "ruler": "尺子", "eraser": "橡皮",
    "stapler": "订书机", "calculator": "计算器", "clock": "时钟",
    "calendar": "日历", "folder": "文件夹",
    
    # Outdoor
    "tree": "树", "flower": "花", "grass": "草", "plant": "植物",
    "leaf": "叶子", "rock": "石头", "mountain": "山", "river": "河",
    "lake": "湖", "ocean": "海洋", "beach": "海滩", "sky": "天空",
    "cloud": "云", "sun": "太阳", "moon": "月亮", "star": "星星",
    
    # Buildings/Structures
    "house": "房子", "building": "建筑", "door": "门", "window": "窗户",
    "wall": "墙", "roof": "屋顶", "floor": "地板", "stairs": "楼梯",
    "fence": "栅栏", "bridge": "桥", "road": "道路", "street": "街道",
    "traffic light": "红绿灯", "stop sign": "停止标志",
    
    # Household
    "lamp": "灯", "light": "灯", "fan": "风扇", "air conditioner": "空调",
    "pillow": "枕头", "blanket": "毯子", "towel": "毛巾", "soap": "肥皂",
    "toothbrush": "牙刷", "toilet": "马桶", "sink": "水槽", "bathtub": "浴缸",
    "shower": "淋浴", "curtain": "窗帘", "carpet": "地毯", "rug": "地毯",
    
    # Misc
    "umbrella": "雨伞", "key": "钥匙", "lock": "锁", "box": "盒子",
    "basket": "篮子", "vase": "花瓶", "candle": "蜡烛", "picture": "图片",
    "painting": "画", "photo": "照片", "frame": "框架", "toy": "玩具",
    "teddy bear": "泰迪熊", "doll": "娃娃", "robot": "机器人",
    "kite": "风筝", "balloon": "气球", "flag": "旗帜",
}


# ============================================================================
# TRANSLATION FUNCTIONS
# ============================================================================
def translate_to_chinese(text: str) -> str:
    """Translate a single English word/phrase to Chinese using dictionary."""
    text_lower = text.lower().strip()
    
    # Direct lookup
    if text_lower in ENGLISH_TO_CHINESE:
        return ENGLISH_TO_CHINESE[text_lower]
    
    # Try without 's' (plurals)
    if text_lower.endswith('s') and text_lower[:-1] in ENGLISH_TO_CHINESE:
        return ENGLISH_TO_CHINESE[text_lower[:-1]]
    
    # Try without 'es' (plurals)
    if text_lower.endswith('es') and text_lower[:-2] in ENGLISH_TO_CHINESE:
        return ENGLISH_TO_CHINESE[text_lower[:-2]]
    
    # Return original if not found (might already be Chinese)
    return text


def translate_class_list(classes_text: str, input_mode: str) -> str:
    """
    Translate a comma-separated list of classes.
    
    Args:
        classes_text: Comma-separated class names
        input_mode: "English" or "Chinese (中文)"
    
    Returns:
        Comma-separated Chinese class names
    """
    if not classes_text.strip():
        return ""
    
    classes = [c.strip() for c in classes_text.split(',') if c.strip()]
    
    if input_mode == "English":
        translated = [translate_to_chinese(c) for c in classes]
        return ', '.join(translated)
    else:
        # Already Chinese, return as-is
        return ', '.join(classes)


# ============================================================================
# MODEL LOADING
# ============================================================================
# Global model cache
_model_cache = {}
_repo_path = None


def setup_repo():
    """Clone the WeDetect repository if not already present."""
    global _repo_path
    
    if _repo_path is not None and os.path.exists(_repo_path):
        return _repo_path
    
    repo_dir = "/tmp/WeDetect"
    
    if not os.path.exists(repo_dir):
        print("📥 Cloning WeDetect repository...")
        try:
            subprocess.run(
                ["git", "clone", "--depth", "1", "https://github.com/WeChatCV/WeDetect.git", repo_dir],
                check=True,
                capture_output=True,
                text=True
            )
            print("✅ Repository cloned!")
        except subprocess.CalledProcessError as e:
            print(f"❌ Failed to clone repository: {e.stderr}")
            raise
    
    # Add to Python path for imports
    if repo_dir not in sys.path:
        sys.path.insert(0, repo_dir)
    
    _repo_path = repo_dir
    return repo_dir


def get_model(model_size: str = DEFAULT_MODEL):
    """Load and cache the WeDetect model."""
    global _model_cache
    
    if model_size in _model_cache:
        return _model_cache[model_size]
    
    import torch
    from mmengine.config import Config
    from mmdet.apis import init_detector
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"🚀 Loading WeDetect-{model_size.capitalize()} on {device}...")
    
    # Setup repository for config files
    repo_dir = setup_repo()
    
    # Config path from cloned repo
    config_path = os.path.join(repo_dir, "config", f"wedetect_{model_size}.py")
    
    if not os.path.exists(config_path):
        raise FileNotFoundError(f"Config not found: {config_path}")
    
    # Download checkpoint from HuggingFace
    checkpoint_file = MODEL_INFO[model_size]["file"]
    print(f"📥 Downloading checkpoint: {checkpoint_file}...")
    
    checkpoint_path = hf_hub_download(
        repo_id=REPO_ID,
        filename=checkpoint_file,
        cache_dir="./models"
    )
    
    # Initialize model
    print("🔧 Initializing model...")
    model = init_detector(config_path, checkpoint_path, device=device)
    
    _model_cache[model_size] = model
    print(f"✅ WeDetect-{model_size.capitalize()} loaded successfully!")
    
    return model


# ============================================================================
# VISUALIZATION
# ============================================================================
def generate_colors(n: int) -> List[Tuple[int, int, int]]:
    """Generate n distinct colors for visualization."""
    colors = []
    for i in range(max(n, 1)):
        hue = i / max(n, 1)
        rgb = colorsys.hsv_to_rgb(hue, 0.8, 0.9)
        colors.append(tuple(int(x * 255) for x in rgb))
    return colors


def draw_detections(
    image: Image.Image,
    boxes: np.ndarray,
    scores: np.ndarray,
    labels: np.ndarray,
    class_names: List[str],
    threshold: float
) -> Tuple[Image.Image, int]:
    """Draw bounding boxes and labels on an image."""
    
    img_draw = image.copy()
    draw = ImageDraw.Draw(img_draw)
    
    # Try to load a Chinese-compatible font
    font = None
    font_paths = [
        "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
        "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
        "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
        "simsun.ttc",
    ]
    
    for font_path in font_paths:
        try:
            font = ImageFont.truetype(font_path, 18)
            break
        except (IOError, OSError):
            continue
    
    if font is None:
        try:
            font = ImageFont.load_default(size=16)
        except TypeError:
            # Older Pillow versions don't support size argument
            font = ImageFont.load_default()
    
    colors = generate_colors(len(class_names))
    detection_count = 0
    
    for box, score, label_idx in zip(boxes, scores, labels):
        if score < threshold:
            continue
        
        detection_count += 1
        x1, y1, x2, y2 = map(int, box)
        color = colors[int(label_idx) % len(colors)]
        
        # Draw bounding box
        draw.rectangle([x1, y1, x2, y2], outline=color, width=3)
        
        # Prepare label text
        class_name = class_names[int(label_idx)] if int(label_idx) < len(class_names) else "?"
        label_text = f"{class_name}: {score:.2f}"
        
        # Get text bounding box
        bbox = draw.textbbox((x1, y1), label_text, font=font)
        text_w = bbox[2] - bbox[0]
        text_h = bbox[3] - bbox[1]
        
        # Draw label background
        draw.rectangle(
            [x1, y1 - text_h - 6, x1 + text_w + 6, y1],
            fill=color
        )
        
        # Draw label text
        draw.text((x1 + 3, y1 - text_h - 3), label_text, fill='white', font=font)
    
    return img_draw, detection_count


# ============================================================================
# MAIN DETECTION FUNCTION
# ============================================================================
@spaces.GPU
def detect_objects(
    image: Optional[Image.Image],
    chinese_classes: str,
    threshold: float,
    model_size: str
) -> Tuple[Optional[Image.Image], str]:
    """
    Run object detection on an image.
    
    Args:
        image: Input PIL Image
        chinese_classes: Comma-separated Chinese class names
        threshold: Confidence threshold
        model_size: Model size to use
    
    Returns:
        Tuple of (annotated image, status message)
    """
    if image is None:
        return None, "⚠️ Please upload an image"
    
    if not chinese_classes.strip():
        return image, "⚠️ Please enter class names to detect"
    
    # Parse class names
    class_names = [c.strip() for c in chinese_classes.split(',') if c.strip()]
    
    if not class_names:
        return image, "⚠️ No valid class names provided"
    
    try:
        import torch
        from mmdet.apis import inference_detector
        
        # Load model
        model = get_model(model_size)
        
        # Update model with class names
        model.dataset_meta['classes'] = class_names
        
        # Save image temporarily
        with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as f:
            temp_path = f.name
            image.save(temp_path)
        
        try:
            # Run inference
            results = inference_detector(model, temp_path, texts=[class_names])
            
            # Extract predictions
            pred = results.pred_instances
            boxes = pred.bboxes.cpu().numpy()
            scores = pred.scores.cpu().numpy()
            labels = pred.labels.cpu().numpy()
            
            # Draw results
            result_image, count = draw_detections(
                image, boxes, scores, labels, class_names, threshold
            )
            
            status = f"✅ Found {count} object(s) | Classes: {', '.join(class_names)}"
            return result_image, status
            
        finally:
            # Cleanup
            if os.path.exists(temp_path):
                os.unlink(temp_path)
            
    except Exception as e:
        import traceback
        error_msg = f"❌ Error: {str(e)}\n{traceback.format_exc()}"
        print(error_msg)
        return image, f"❌ Error: {str(e)}"


# ============================================================================
# GRADIO INTERFACE
# ============================================================================
# Custom CSS for styling
CUSTOM_CSS = """
.output-class { font-family: 'Noto Sans SC', sans-serif; }
.info-text { color: #666; font-size: 0.9em; }
"""


def create_demo():
    """Create the Gradio demo interface."""
    
    # NOTE: In Gradio 5.50+/6.0, theme and css must be passed to launch(), not Blocks()
    with gr.Blocks() as demo:
        
        gr.Markdown("""
        # 🔍 WeDetect: Open-Vocabulary Object Detection
        
        Upload an image and specify what objects to detect. Enter class names in **English** or **Chinese**.
        
        > **Note:** WeDetect uses Chinese internally. English inputs are automatically translated.
        """)
        
        with gr.Row():
            # Left column: Inputs
            with gr.Column(scale=1):
                input_image = gr.Image(
                    label="📷 Upload Image",
                    type="pil",
                    height=350
                )
                
                input_mode = gr.Radio(
                    choices=["English", "Chinese (中文)"],
                    value="English",
                    label="🌐 Input Language",
                    info="Choose the language for entering class names"
                )
                
                classes_input = gr.Textbox(
                    label="🏷️ Classes to Detect",
                    placeholder="person, car, dog, cat",
                    value="person, car, dog",
                    info="Enter class names separated by commas",
                    lines=2
                )
                
                chinese_preview = gr.Textbox(
                    label="🀄 Chinese Classes (Editable)",
                    placeholder="人, 车, 狗, 猫",
                    value="人, 车, 狗",
                    info="Edit if translation needs correction",
                    lines=2
                )
                
                threshold_slider = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    value=0.3,
                    step=0.05,
                    label="📊 Confidence Threshold"
                )
                
                model_dropdown = gr.Dropdown(
                    choices=["large", "base", "tiny"],
                    value=DEFAULT_MODEL,
                    label="🧠 Model Size",
                    info="Large=best quality, Tiny=fastest"
                )
                
                detect_btn = gr.Button(
                    "🔍 Detect Objects",
                    variant="primary",
                    size="lg"
                )
            
            # Right column: Output
            with gr.Column(scale=1):
                output_image = gr.Image(
                    label="🎯 Detection Results",
                    type="pil",
                    height=350
                )
                
                status_text = gr.Textbox(
                    label="📋 Status",
                    interactive=False,
                    lines=2
                )
        
        # Class name reference
        with gr.Accordion("📚 Common Class Names Reference", open=False):
            gr.Markdown("""
            | English | Chinese | | English | Chinese | | English | Chinese |
            |---------|---------|---|---------|---------|---|---------|---------|
            | person | 人 | | car | 车 | | dog | 狗 |
            | cat | 猫 | | bird | 鸟 | | horse | 马 |
            | bicycle | 自行车 | | motorcycle | 摩托车 | | bus | 公交车 |
            | truck | 卡车 | | chair | 椅子 | | table | 桌子 |
            | bed | 床 | | couch | 沙发 | | tv | 电视 |
            | laptop | 笔记本电脑 | | phone | 手机 | | book | 书 |
            | bottle | 瓶子 | | cup | 杯子 | | shoe | 鞋 |
            | bag | 包 | | umbrella | 雨伞 | | tree | 树 |
            
            **Example inputs:**
            - English: `person, car, dog, cat, bicycle`
            - Chinese: `人, 车, 狗, 猫, 自行车`
            """)
        
        # Event handlers
        def update_chinese_preview(classes_text: str, mode: str) -> str:
            return translate_class_list(classes_text, mode)
        
        # Auto-translate when input changes
        classes_input.change(
            fn=update_chinese_preview,
            inputs=[classes_input, input_mode],
            outputs=chinese_preview
        )
        
        input_mode.change(
            fn=update_chinese_preview,
            inputs=[classes_input, input_mode],
            outputs=chinese_preview
        )
        
        # Detection button click
        detect_btn.click(
            fn=detect_objects,
            inputs=[input_image, chinese_preview, threshold_slider, model_dropdown],
            outputs=[output_image, status_text]
        )
        
        gr.Markdown("""
        ---
        **Credits:** [WeDetect](https://github.com/WeChatCV/WeDetect) by WeChatCV | 
        [Paper](https://arxiv.org/abs/2512.12309) | 
        [Models](https://huggingface.co/fushh7/WeDetect)
        """)
    
    return demo


# ============================================================================
# MAIN
# ============================================================================
if __name__ == "__main__":
    demo = create_demo()
    # Pass theme and css to launch() for Gradio 5.50+/6.0 compatibility
    demo.launch(
    )