Spaces:

VIDraft
/

XRAY

Sleeping

App Files Files Community

openfree commited on Jun 13, 2025

Commit

1c7e0b2

verified ·

1 Parent(s): c2e0b70

Update app.py

Browse files

Files changed (1) hide show

app.py +428 -498

app.py CHANGED Viewed

@@ -1,530 +1,460 @@
 import os
-# Set environment variables for Spaces compatibility
-os.environ['OMP_NUM_THREADS'] = '1'
-os.environ['MKL_NUM_THREADS'] = '1'
-import cv2
-import yaml
-import torch
-import random
 import gradio as gr
-import numpy as np
-import kagglehub
 from PIL import Image
-from glob import glob
-import matplotlib
-matplotlib.use('Agg')  # Use non-interactive backend
-import matplotlib.pyplot as plt
-from matplotlib import patches
-from torchvision import transforms as T
-from ultralytics import YOLO
-import shutil
 import tempfile
-from pathlib import Path
-import json
-from io import BytesIO
-# Try to import spaces for Hugging Face Spaces GPU support
 try:
-    import spaces
-    ON_SPACES = True
-except ImportError:
-    ON_SPACES = False
-    # Create a dummy decorator if not on Spaces
-    class spaces:
-        @staticmethod
-        def GPU(duration=60):
-            def decorator(func):
-                return func
-            return decorator
-# Set Kaggle API credentials from environment variable
-if os.getenv("KDATA_API"):
-    kaggle_key = os.getenv("KDATA_API")
-    # Parse the key if it's in JSON format
-    if "{" in kaggle_key:
-        key_data = json.loads(kaggle_key)
-        os.environ["KAGGLE_USERNAME"] = key_data.get("username", "")
-        os.environ["KAGGLE_KEY"] = key_data.get("key", "")
-# Global variables
-model = None
-dataset_path = None
-training_in_progress = False
-class Visualization:
-    def __init__(self, root, data_types, n_ims, rows, cmap=None):
-        self.n_ims, self.rows = n_ims, rows
-        self.cmap, self.data_types = cmap, data_types
-        self.colors = ["firebrick", "darkorange", "blueviolet"]
-        self.root = root
-        self.get_cls_names()
-        self.get_bboxes()
-    def get_cls_names(self):
-        with open(f"{self.root}/data.yaml", 'r') as file:
-            data = yaml.safe_load(file)
-        class_names = data['names']
-        self.class_dict = {index: name for index, name in enumerate(class_names)}
-    def get_bboxes(self):
-        self.vis_datas, self.analysis_datas, self.im_paths = {}, {}, {}
-        for data_type in self.data_types:
-            all_bboxes, all_analysis_datas = [], {}
-            im_paths = glob(f"{self.root}/{data_type}/images/*")
-            for idx, im_path in enumerate(im_paths):
-                bboxes = []
-                im_ext = os.path.splitext(im_path)[-1]
-                lbl_path = im_path.replace(im_ext, ".txt")
-                lbl_path = lbl_path.replace(f"{data_type}/images", f"{data_type}/labels")
-                if not os.path.isfile(lbl_path):
-                    continue
-                meta_data = open(lbl_path).readlines()
-                for data in meta_data:
-                    parts = data.strip().split()[:5]
-                    cls_name = self.class_dict[int(parts[0])]
-                    bboxes.append([cls_name] + [float(x) for x in parts[1:]])
-                    if cls_name not in all_analysis_datas:
-                        all_analysis_datas[cls_name] = 1
-                    else:
-                        all_analysis_datas[cls_name] += 1
-                all_bboxes.append(bboxes)
-            self.vis_datas[data_type] = all_bboxes
-            self.analysis_datas[data_type] = all_analysis_datas
-            self.im_paths[data_type] = im_paths
-    def plot_single(self, im_path, bboxes):
-        or_im = np.array(Image.open(im_path).convert("RGB"))
-        height, width, _ = or_im.shape
-        for bbox in bboxes:
-            class_id, x_center, y_center, w, h = bbox
-            x_min = int((x_center - w / 2) * width)
-            y_min = int((y_center - h / 2) * height)
-            x_max = int((x_center + w / 2) * width)
-            y_max = int((y_center + h / 2) * height)
-            color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
-            cv2.rectangle(img=or_im, pt1=(x_min, y_min), pt2=(x_max, y_max),
-                         color=color, thickness=3)
-        # Add text overlay
-        cv2.putText(or_im, f"Objects: {len(bboxes)}", (10, 30),
-                   cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
-        # Convert BGR to RGB if needed
-        if len(or_im.shape) == 3 and or_im.shape[2] == 3:
-            or_im = cv2.cvtColor(or_im, cv2.COLOR_BGR2RGB)
-        return Image.fromarray(or_im)
-    def vis_samples(self, data_type, n_samples=4):
-        if data_type not in self.vis_datas:
-            return None
-        indices = [random.randint(0, len(self.vis_datas[data_type]) - 1)
-                  for _ in range(min(n_samples, len(self.vis_datas[data_type])))]
-        figs = []
-        for idx in indices:
-            im_path = self.im_paths[data_type][idx]
-            bboxes = self.vis_datas[data_type][idx]
-            fig = self.plot_single(im_path, bboxes)
-            figs.append(fig)
-        return figs
-    def data_analysis(self, data_type):
-        if data_type not in self.analysis_datas:
-            return None
-        plt.style.use('default')
-        fig, ax = plt.subplots(figsize=(12, 6))
-        cls_names = list(self.analysis_datas[data_type].keys())
-        counts = list(self.analysis_datas[data_type].values())
-        color_map = {"train": "firebrick", "valid": "darkorange", "test": "blueviolet"}
-        color = color_map.get(data_type, "steelblue")
-        indices = np.arange(len(counts))
-        bars = ax.bar(indices, counts, 0.7, color=color)
-        ax.set_xlabel("Class Names", fontsize=12)
-        ax.set_xticks(indices)
-        ax.set_xticklabels(cls_names, rotation=45, ha='right')
-        ax.set_ylabel("Data Counts", fontsize=12)
-        ax.set_title(f"{data_type.upper()} Dataset Class Distribution", fontsize=14)
-        for i, (bar, v) in enumerate(zip(bars, counts)):
-            ax.text(bar.get_x() + bar.get_width()/2, v + 1, str(v),
-                   ha='center', va='bottom', fontsize=10, color='navy')
-        plt.tight_layout()
-        # Save to BytesIO and convert to PIL Image
-        buf = BytesIO()
-        fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
-        buf.seek(0)
-        img = Image.open(buf)
-        plt.close(fig)
-        return img
-def download_dataset():
-    """Download the dataset using kagglehub"""
-    global dataset_path
     try:
-        # Create a local directory to store the dataset
-        local_dir = "./xray_dataset"
-        # Download dataset
-        dataset_path = kagglehub.dataset_download("orvile/x-ray-baggage-anomaly-detection")
-        # If the dataset is downloaded to a temporary location, copy it to our local directory
-        if dataset_path != local_dir and os.path.exists(dataset_path):
-            if os.path.exists(local_dir):
-                shutil.rmtree(local_dir)
-            shutil.copytree(dataset_path, local_dir)
-            dataset_path = local_dir
-        return f"Dataset downloaded successfully to: {dataset_path}"
     except Exception as e:
-        return f"Error downloading dataset: {str(e)}\n\nPlease ensure KDATA_API environment variable is set correctly."
-def visualize_data(data_type, num_samples):
-    """Visualize sample images from the dataset"""
-    if dataset_path is None:
-        return [], "Please download the dataset first!"
-    try:
-        vis = Visualization(root=dataset_path, data_types=[data_type],
-                          n_ims=num_samples, rows=2, cmap="rgb")
-        figs = vis.vis_samples(data_type, num_samples)
-        if figs is None:
-            return [], f"No data found for {data_type} dataset"
-        return figs, f"Showing {len(figs)} samples from {data_type} dataset"
-    except Exception as e:
-        return [], f"Error visualizing data: {str(e)}"
-def analyze_class_distribution(data_type):
-    """Analyze class distribution in the dataset"""
-    if dataset_path is None:
-        return None, "Please download the dataset first!"
-    try:
-        vis = Visualization(root=dataset_path, data_types=[data_type],
-                          n_ims=20, rows=5, cmap="rgb")
-        fig = vis.data_analysis(data_type)
-        if fig is None:
-            return None, f"No data found for {data_type} dataset"
-        return fig, f"Class distribution for {data_type} dataset"
-    except Exception as e:
-        return None, f"Error analyzing data: {str(e)}"
-@spaces.GPU(duration=300)  # Request GPU for 5 minutes for training
-def train_model(epochs, batch_size, img_size, device_selection):
-    """Train YOLOv11 model"""
-    global model, training_in_progress
-    if dataset_path is None:
-        return [], "Please download the dataset first!"
-    if training_in_progress:
-        return [], "Training already in progress!"
-    training_in_progress = True
-    try:
-        # Determine device - on Spaces, always use GPU if available
-        if ON_SPACES and torch.cuda.is_available():
-            device = 0
-        elif device_selection == "Auto":
-            device = 0 if torch.cuda.is_available() else "cpu"
-        elif device_selection == "CPU":
-            device = "cpu"
-        else:
-            device = 0 if torch.cuda.is_available() else "cpu"
-        # Initialize model
-        model = YOLO("yolo11n.pt")
-        # Create project directory
-        project_dir = "./xray_detection"
-        os.makedirs(project_dir, exist_ok=True)
-        # Train model with workers=0 to avoid multiprocessing issues on Spaces
-        results = model.train(
-            data=f"{dataset_path}/data.yaml",
-            epochs=epochs,
-            imgsz=img_size,
-            batch=batch_size,
-            device=device,
-            project=project_dir,
-            name="train",
-            exist_ok=True,
-            verbose=True,
-            patience=5,  # Reduce patience for faster training on Spaces
-            save_period=5,  # Save checkpoints every 5 epochs
-            workers=0,  # Important: Set to 0 to avoid multiprocessing issues
-            single_cls=False,
-            rect=False,
-            cache=False,  # Disable caching to avoid memory issues
-            amp=True  # Use automatic mixed precision for faster training
         )
-        # Collect training result plots
-        results_path = os.path.join(project_dir, "train")
-        plots = []
-        plot_files = ["results.png", "confusion_matrix.png", "val_batch0_pred.jpg",
-                     "train_batch0.jpg", "val_batch0_labels.jpg"]
-        for plot_file in plot_files:
-            plot_path = os.path.join(results_path, plot_file)
-            if os.path.exists(plot_path):
-                plots.append(Image.open(plot_path))
-        # Save the model path
-        model_path = os.path.join(results_path, "weights", "best.pt")
-        training_in_progress = False
-        return plots, f"Training completed! Model saved to {model_path}"
-    except Exception as e:
-        training_in_progress = False
-        return [], f"Error during training: {str(e)}"
-@spaces.GPU(duration=60)  # Request GPU for 1 minute for inference
-def run_inference(input_image, conf_threshold):
-    """Run inference on a single image"""
-    global model
-    if model is None:
-        # Try to load a default model
         try:
-            model = YOLO("yolo11n.pt")
-        except:
-            return None, "Please train the model first or load a pre-trained model!"
-    if input_image is None:
-        return None, "Please upload an image!"
-    try:
-        # Save the input image temporarily
-        temp_path = "temp_inference.jpg"
-        input_image.save(temp_path)
-        # Run inference with workers=0
-        results = model(temp_path, conf=conf_threshold, verbose=False, device=0 if torch.cuda.is_available() else 'cpu')
-        # Draw results
-        annotated_image = results[0].plot()
-        # Get detection info
-        detections = []
-        if results[0].boxes is not None:
-            for box in results[0].boxes:
-                cls = int(box.cls)
-                conf = float(box.conf)
-                cls_name = model.names[cls]
-                detections.append(f"{cls_name}: {conf:.2f}")
-        # Clean up
-        if os.path.exists(temp_path):
-            os.remove(temp_path)
-        detection_text = "\n".join(detections) if detections else "No objects detected"
-        return Image.fromarray(annotated_image), f"Detections:\n{detection_text}"
-    except Exception as e:
-        return None, f"Error during inference: {str(e)}"
-@spaces.GPU(duration=60)  # Request GPU for batch inference
-def batch_inference(data_type, num_images):
-    """Run inference on multiple images from test set"""
-    global model
-    if model is None:
         try:
-            model = YOLO("yolo11n.pt")
-        except:
-            return [], "Please train the model first!"
-    if dataset_path is None:
-        return [], "Please download the dataset first!"
-    try:
-        image_dir = f"{dataset_path}/{data_type}/images"
-        if not os.path.exists(image_dir):
-            return [], f"Directory {image_dir} not found!"
-        image_files = glob(f"{image_dir}/*")[:num_images]
-        if not image_files:
-            return [], f"No images found in {image_dir}"
-        results_images = []
-        for img_path in image_files:
-            results = model(img_path, verbose=False)
-            annotated = results[0].plot()
-            results_images.append(Image.fromarray(annotated))
-        return results_images, f"Processed {len(results_images)} images from {data_type} dataset"
-    except Exception as e:
-        return [], f"Error during batch inference: {str(e)}"
-def load_pretrained_model(model_path):
-    """Load a pre-trained model"""
-    global model
-    try:
-        if not os.path.exists(model_path):
-            # Try default paths
-            default_paths = [
-                "./xray_detection/train/weights/best.pt",
-                "./xray_detection/train/weights/last.pt",
-                "yolo11n.pt"
-            ]
-            for path in default_paths:
-                if os.path.exists(path):
-                    model_path = path
-                    break
-        model = YOLO(model_path)
-        return f"Model loaded successfully from {model_path}"
-    except Exception as e:
-        return f"Error loading model: {str(e)}"
-# Create Gradio interface
-with gr.Blocks(title="X-ray Baggage Anomaly Detection", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-    # 🎯 X-ray Baggage Anomaly Detection with YOLOv11
-    This application allows you to:
-    1. Download and visualize the X-ray baggage dataset
-    2. Analyze class distributions
-    3. Train a YOLOv11 model for object detection
-    4. Run inference on new images
-    **Note:** GPU will be automatically allocated when needed for training and inference.
-    """)
-    # Add instructions for Kaggle API setup
-    with gr.Accordion("📝 Setup Instructions", open=False):
-        gr.Markdown("""
-        ### Kaggle API Setup
-        1. Get your Kaggle API credentials from https://www.kaggle.com/settings
-        2. Set the KDATA_API environment variable in Hugging Face Spaces settings:
-           ```
-           KDATA_API={"username":"your_username","key":"your_api_key"}
-           ```
-        """)
-    with gr.Tab("📊 Dataset"):
-        with gr.Row():
-            download_btn = gr.Button("Download Dataset", variant="primary", scale=1)
-            download_status = gr.Textbox(label="Status", interactive=False, scale=3)
-        download_btn.click(download_dataset, outputs=download_status)
-        gr.Markdown("### Visualize Dataset Samples")
-        with gr.Row():
-            data_type_viz = gr.Dropdown(["train", "valid", "test"], value="train", label="Dataset Type")
-            num_samples = gr.Slider(1, 8, 4, step=1, label="Number of Samples")
-            viz_btn = gr.Button("Visualize Samples")
-        viz_gallery = gr.Gallery(label="Sample Images", columns=2, height="auto")
-        viz_status = gr.Textbox(label="Status", interactive=False)
-        viz_btn.click(visualize_data, inputs=[data_type_viz, num_samples],
-                     outputs=[viz_gallery, viz_status])
-        gr.Markdown("### Analyze Class Distribution")
-        with gr.Row():
-            data_type_analysis = gr.Dropdown(["train", "valid", "test"], value="train", label="Dataset Type")
-            analyze_btn = gr.Button("Analyze Distribution")
-        distribution_plot = gr.Image(label="Class Distribution", type="pil")
-        analysis_status = gr.Textbox(label="Status", interactive=False)
-        analyze_btn.click(analyze_class_distribution, inputs=data_type_analysis,
-                         outputs=[distribution_plot, analysis_status])
-    with gr.Tab("🚀 Training"):
-        gr.Markdown("### Train YOLOv11 Model")
-        gr.Markdown("""
-        **Note:** Training will automatically use GPU if available. This may take several minutes.
-        **Tips for Hugging Face Spaces:**
-        - Use smaller batch sizes (4-8) to avoid GPU memory issues
-        - Start with fewer epochs (5-10) for testing
-        - Image size 480 provides good balance between quality and speed
-        """)
-        with gr.Row():
-            epochs_input = gr.Slider(1, 50, 10, step=1, label="Epochs")
-            batch_size_input = gr.Slider(4, 32, 8, step=4, label="Batch Size (lower for limited GPU)")
-            img_size_input = gr.Slider(320, 640, 480, step=32, label="Image Size")
-            device_input = gr.Radio(["Auto", "GPU", "CPU"], value="Auto", label="Device")
-        train_btn = gr.Button("Start Training", variant="primary")
-        training_gallery = gr.Gallery(label="Training Results", columns=3, height="auto")
-        training_status = gr.Textbox(label="Training Status", interactive=False)
-        train_btn.click(train_model,
-                       inputs=[epochs_input, batch_size_input, img_size_input, device_input],
-                       outputs=[training_gallery, training_status])
-        gr.Markdown("### Load Pre-trained Model")
-        with gr.Row():
-            model_path_input = gr.Textbox(label="Model Path", value="./xray_detection/train/weights/best.pt")
-            load_model_btn = gr.Button("Load Model")
-            load_status = gr.Textbox(label="Status", interactive=False)
-        load_model_btn.click(load_pretrained_model, inputs=model_path_input, outputs=load_status)
-    with gr.Tab("🔍 Inference"):
-        gr.Markdown("### Single Image Inference")
-        with gr.Row():
-            with gr.Column():
-                input_image = gr.Image(type="pil", label="Upload Image")
-                conf_threshold = gr.Slider(0.1, 0.9, 0.5, step=0.05, label="Confidence Threshold")
-                inference_btn = gr.Button("Run Detection", variant="primary")
-            with gr.Column():
-                output_image = gr.Image(type="pil", label="Detection Result")
-                detection_info = gr.Textbox(label="Detection Info", lines=5)
-        inference_btn.click(run_inference,
-                          inputs=[input_image, conf_threshold],
-                          outputs=[output_image, detection_info])
-        gr.Markdown("### Batch Inference")
-        with gr.Row():
-            batch_data_type = gr.Dropdown(["test", "valid"], value="test", label="Dataset Type")
-            batch_num_images = gr.Slider(1, 10, 5, step=1, label="Number of Images")
-            batch_btn = gr.Button("Run Batch Inference")
-        batch_gallery = gr.Gallery(label="Batch Results", columns=3, height="auto")
-        batch_status = gr.Textbox(label="Status", interactive=False)
-        batch_btn.click(batch_inference,
-                       inputs=[batch_data_type, batch_num_images],
-                       outputs=[batch_gallery, batch_status])
-# Launch the app
 if __name__ == "__main__":
-    # Check if running on Hugging Face Spaces
-    if ON_SPACES:
-        demo.launch(ssr_mode=False)
-    else:
-        demo.launch(share=True, ssr_mode=False)

+# UVIS - Gradio App with Upload, URL & Video Support + HF Token Authentication
+"""
+This script launches the UVIS (Unified Visual Intelligence System) as a Gradio Web App.
+Supports image, video, and URL-based media inputs for detection, segmentation, and depth estimation.
+Outputs include scene blueprint, structured JSON, and downloadable results.
+Now includes HuggingFace token authentication for private model access.
+"""
 import os
+import time
+import logging
+import traceback
 import gradio as gr
 from PIL import Image
+import cv2
+import timeout_decorator
+import spaces
 import tempfile
+import shutil
+from registry import get_model
+from core.describe_scene import describe_scene
+from core.process import process_image, process_video
+from core.input_handler import resolve_input, validate_video, validate_image
+from utils.helpers import format_error, generate_session_id
+from huggingface_hub import hf_hub_download, login
+# HuggingFace Token Authentication
+HF_TOKEN = os.getenv("HF_TOKEN")
+if HF_TOKEN:
+    try:
+        login(token=HF_TOKEN)
+        print("✅ Successfully authenticated with HuggingFace using HF_TOKEN")
+    except Exception as e:
+        print(f"⚠️ Failed to authenticate with HuggingFace: {e}")
+else:
+    print("⚠️ HF_TOKEN not found in environment variables. Some models may not be accessible.")
+# Clear HF cache if needed
 try:
+    cache_paths = [
+        os.path.expanduser("~/.cache/huggingface"),
+        "/home/user/.cache/huggingface"
+    ]
+    for path in cache_paths:
+        if os.path.exists(path):
+            shutil.rmtree(path, ignore_errors=True)
+    print("💥 Nuked HF model cache from runtime.")
+except Exception as e:
+    print("🚫 Failed to nuke cache:", e)
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Model mappings
+DETECTION_MODEL_MAP = {
+    "YOLOv8-Nano": "yolov8n",
+    "YOLOv8-Small": "yolov8s",
+    "YOLOv8-Large": "yolov8l",
+    "YOLOv11-Beta": "yolov11b"
+}
+SEGMENTATION_MODEL_MAP = {
+    "SegFormer-B0": "segformer_b0",
+    "SegFormer-B5": "segformer_b5",
+    "DeepLabV3-ResNet50": "deeplabv3_resnet50"
+}
+DEPTH_MODEL_MAP = {
+    "MiDaS v21 Small 256": "midas_v21_small_256",
+    "MiDaS v21 384": "midas_v21_384",
+    "DPT Hybrid 384": "dpt_hybrid_384",
+    "DPT Swin2 Large 384": "dpt_swin2_large_384",
+    "DPT Beit Large 512": "dpt_beit_large_512"
+}
+# Modified get_model wrapper to include HF token
+def get_model_with_auth(model_type, model_name, device="cpu"):
+    """
+    Wrapper for get_model that includes HF token authentication.
+    """
+    # Pass HF_TOKEN to the registry get_model function if it exists
+    # This assumes the registry.get_model can accept a token parameter
     try:
+        if hasattr(get_model, '__code__') and 'token' in get_model.__code__.co_varnames:
+            return get_model(model_type, model_name, device=device, token=HF_TOKEN)
+        else:
+            # If get_model doesn't support token, use standard call
+            return get_model(model_type, model_name, device=device)
     except Exception as e:
+        logger.error(f"Failed to load model {model_type}/{model_name}: {e}")
+        # Fallback: try without token parameter
+        return get_model(model_type, model_name, device=device)
+@spaces.GPU
+def handle(mode, media_upload, url,
+           run_det, det_model, det_confidence,
+           run_seg, seg_model,
+           run_depth, depth_model,
+           blend):
+    """
+    Master handler for resolving input and processing.
+    Returns: (img_out, vid_out, json_out, zip_out)
+    """
+    session_id = generate_session_id()
+    logger.info(f"Session ID: {session_id} | Handler activated with mode: {mode}")
+    start_time = time.time()
+    # Check HF authentication status
+    if not HF_TOKEN:
+        logger.warning("Processing without HF authentication. Some models may not be available.")
+    media = resolve_input(mode, media_upload, url)
+    if not media:
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            format_error("No valid input provided. Please check your upload or URL."),
+            None
         )
+    first_input = media[0]
+    # 🔧 Resolve dropdown label to model keys
+    resolved_det_model = DETECTION_MODEL_MAP.get(det_model, det_model)
+    resolved_seg_model = SEGMENTATION_MODEL_MAP.get(seg_model, seg_model)
+    resolved_depth_model = DEPTH_MODEL_MAP.get(depth_model, depth_model)
+    # --- VIDEO PATH ---
+    if isinstance(first_input, str) and first_input.lower().endswith((".mp4", ".mov", ".avi")):
+        valid, err = validate_video(first_input)
+        if not valid:
+            return (
+                gr.update(visible=False),
+                gr.update(visible=False),
+                format_error(err),
+                None
+            )
         try:
+            # Pass HF_TOKEN to process_video if needed
+            _, msg, output_video_path = process_video(
+                video_path=first_input,
+                run_det=run_det,
+                det_model=resolved_det_model,
+                det_confidence=det_confidence,
+                run_seg=run_seg,
+                seg_model=resolved_seg_model,
+                run_depth=run_depth,
+                depth_model=resolved_depth_model,
+                blend=blend,
+                hf_token=HF_TOKEN  # Pass token if process_video supports it
+            )
+            return (
+                gr.update(visible=False),  # hide image
+                gr.update(value=output_video_path, visible=True),  # show video
+                msg,
+                output_video_path  # for download
+            )
+        except Exception as e:
+            logger.error(f"Video processing failed: {e}")
+            # If it's an authentication error, provide specific message
+            if "401" in str(e) or "unauthorized" in str(e).lower():
+                error_msg = "Authentication failed. Please check HF_TOKEN environment variable."
+            else:
+                error_msg = str(e)
+            return (
+                gr.update(visible=False),
+                gr.update(visible=False),
+                format_error(error_msg),
+                None
+            )
+    # --- IMAGE PATH ---
+    elif isinstance(first_input, Image.Image):
+        valid, err = validate_image(first_input)
+        if not valid:
+            return (
+                gr.update(visible=False),
+                gr.update(visible=False),
+                format_error(err),
+                None
+            )
         try:
+            # Pass HF_TOKEN to process_image if needed
+            result_img, msg, output_zip = process_image(
+                image=first_input,
+                run_det=run_det,
+                det_model=resolved_det_model,
+                det_confidence=det_confidence,
+                run_seg=run_seg,
+                seg_model=resolved_seg_model,
+                run_depth=run_depth,
+                depth_model=resolved_depth_model,
+                blend=blend,
+                hf_token=HF_TOKEN  # Pass token if process_image supports it
+            )
+            return (
+                gr.update(value=result_img, visible=True),  # show image
+                gr.update(visible=False),  # hide video
+                msg,
+                output_zip
+            )
+        except timeout_decorator.timeout_decorator.TimeoutError:
+            logger.error("Image processing timed out.")
+            return (
+                gr.update(visible=False),
+                gr.update(visible=False),
+                format_error("Processing timed out. Try a smaller image or simpler model."),
+                None
+            )
+        except Exception as e:
+            traceback.print_exc()
+            logger.error(f"Image processing failed: {e}")
+            # If it's an authentication error, provide specific message
+            if "401" in str(e) or "unauthorized" in str(e).lower():
+                error_msg = "Authentication failed. Please check HF_TOKEN environment variable."
+            else:
+                error_msg = str(e)
+            return (
+                gr.update(visible=False),
+                gr.update(visible=False),
+                format_error(error_msg),
+                None
+            )
+    logger.warning("Unsupported media type resolved.")
+    return (
+        gr.update(visible=False),
+        gr.update(visible=False),
+        format_error("Unsupported input type."),
+        None
+    )
+def show_preview_from_upload(files):
+    if not files:
+        return gr.update(visible=False), gr.update(visible=False)
+    file = files[0]
+    filename = file.name.lower()
+    if filename.endswith((".png", ".jpg", ".jpeg", ".webp")):
+        img = Image.open(file).convert("RGB")
+        return gr.update(value=img, visible=True), gr.update(visible=False)
+    elif filename.endswith((".mp4", ".mov", ".avi")):
+        # Copy uploaded video to a known temp location
+        temp_dir = tempfile.mkdtemp()
+        ext = os.path.splitext(filename)[-1]
+        safe_path = os.path.join(temp_dir, f"uploaded_video{ext}")
+        with open(safe_path, "wb") as f:
+            f.write(file.read())
+        return gr.update(visible=False), gr.update(value=safe_path, visible=True)
+    return gr.update(visible=False), gr.update(visible=False)
+def show_preview_from_url(url_input):
+    if not url_input:
+        return gr.update(visible=False), gr.update(visible=False)
+    path = url_input.strip().lower()
+    if path.endswith((".png", ".jpg", ".jpeg", ".webp")):
+        return gr.update(value=url_input, visible=True), gr.update(visible=False)
+    elif path.endswith((".mp4", ".mov", ".avi")):
+        return gr.update(visible=False), gr.update(value=url_input, visible=True)
+    return gr.update(visible=False), gr.update(visible=False)
+def clear_model_cache():
+    """
+    Deletes all model weight folders so they are redownloaded fresh.
+    """
+    folders = [
+        "models/detection/weights",
+        "models/segmentation/weights",
+        "models/depth/weights"
+    ]
+    for folder in folders:
+        shutil.rmtree(folder, ignore_errors=True)
+        logger.info(f"🗑️ Cleared: {folder}")
+    # Also clear HF cache if token is available
+    if HF_TOKEN:
+        try:
+            cache_paths = [
+                os.path.expanduser("~/.cache/huggingface"),
+                "/home/user/.cache/huggingface"
+            ]
+            for path in cache_paths:
+                if os.path.exists(path):
+                    shutil.rmtree(path, ignore_errors=True)
+            return "✅ Model cache and HF cache cleared. Models will be reloaded on next run."
+        except Exception as e:
+            return f"⚠️ Model cache cleared, but failed to clear HF cache: {e}"
+    return "✅ Model cache cleared. Models will be reloaded on next run."
+def check_auth_status():
+    """
+    Check and display current authentication status.
+    """
+    if HF_TOKEN:
+        return f"✅ Authenticated with HuggingFace (Token: {HF_TOKEN[:8]}...)"
+    else:
+        return "❌ Not authenticated. Set HF_TOKEN environment variable for private model access."
+# Gradio Interface
+with gr.Blocks(title="UVIS - Unified Visual Intelligence System") as demo:
+    gr.Markdown("## Unified Visual Intelligence System (UVIS)")
+    # Authentication Status
+    with gr.Row():
+        auth_status = gr.Textbox(
+            label="HF Authentication Status",
+            value=check_auth_status(),
+            interactive=False
+        )
+    with gr.Row():
+        # left panel
+        with gr.Column(scale=2):
+            # Input Mode Toggle
+            mode = gr.Radio(["Upload", "URL"], value="Upload", label="Input Mode")
+            # File upload: accepts multiple images or one video (user chooses wisely)
+            media_upload = gr.File(
+                label="Upload Images (1–5) or 1 Video",
+                file_types=["image", ".mp4", ".mov", ".avi"],
+                file_count="multiple",
+                visible=True
+            )
+            # URL input
+            url = gr.Textbox(label="URL (Image/Video)", visible=False)
+            # Toggle visibility
+            def toggle_inputs(selected_mode):
+                return [
+                    gr.update(visible=(selected_mode == "Upload")),  # media_upload
+                    gr.update(visible=(selected_mode == "URL")),      # url
+                    gr.update(visible=False),                        # preview_image
+                    gr.update(visible=False)                         # preview_video
+                ]
+            mode.change(toggle_inputs, inputs=mode, outputs=[media_upload, url])
+            # Visibility logic function
+            def toggle_visibility(checked):
+                return gr.update(visible=checked)
+            run_det = gr.Checkbox(label="Object Detection")
+            run_seg = gr.Checkbox(label="Semantic Segmentation")
+            run_depth = gr.Checkbox(label="Depth Estimation")
+            with gr.Row():
+                with gr.Column(visible=False) as OD_Settings:
+                    with gr.Accordion("Object Detection Settings", open=True):
+                        det_model = gr.Dropdown(
+                            choices=list(DETECTION_MODEL_MAP.keys()),
+                            label="Detection Model",
+                            value="YOLOv8-Nano"
+                        )
+                        det_confidence = gr.Slider(0.1, 1.0, 0.5, label="Detection Confidence Threshold")
+                        nms_thresh = gr.Slider(0.1, 1.0, 0.45, label="NMS Threshold")
+                        max_det = gr.Slider(1, 100, 20, step=1, label="Max Detections")
+                        iou_thresh = gr.Slider(0.1, 1.0, 0.5, label="IoU Threshold")
+                        class_filter = gr.CheckboxGroup(["Person", "Car", "Dog"], label="Class Filter")
+                with gr.Column(visible=False) as SS_Settings:
+                    with gr.Accordion("Semantic Segmentation Settings", open=True):
+                        seg_model = gr.Dropdown(
+                            choices=list(SEGMENTATION_MODEL_MAP.keys()),
+                            label="Segmentation Model",
+                            value="DeepLabV3-ResNet50"
+                        )
+                        resize_strategy = gr.Dropdown(["Crop", "Pad", "Scale"], label="Resize Strategy", value="Scale")
+                        overlay_alpha = gr.Slider(0.0, 1.0, 0.5, label="Overlay Opacity")
+                        seg_classes = gr.CheckboxGroup(["Road", "Sky", "Building"], label="Target Classes")
+                        enable_crf = gr.Checkbox(label="Postprocessing (CRF)")
+                with gr.Column(visible=False) as DE_Settings:
+                    with gr.Accordion("Depth Estimation Settings", open=True):
+                        depth_model = gr.Dropdown(
+                            choices=list(DEPTH_MODEL_MAP.keys()),
+                            label="Depth Model",
+                            value="MiDaS v21 Small 256"
+                        )
+                        output_type = gr.Dropdown(["Raw", "Disparity", "Scaled"], label="Output Type", value="Scaled")
+                        colormap = gr.Dropdown(["Jet", "Viridis", "Plasma"], label="Colormap", value="Jet")
+                        blend = gr.Slider(0.0, 1.0, 0.5, label="Overlay Blend")
+                        normalize = gr.Checkbox(label="Normalize Depth", value=True)
+                        max_depth = gr.Slider(0.1, 10.0, 5.0, label="Max Depth (meters)")
+            # Attach Visibility Logic
+            run_det.change(fn=toggle_visibility, inputs=[run_det], outputs=[OD_Settings])
+            run_seg.change(fn=toggle_visibility, inputs=[run_seg], outputs=[SS_Settings])
+            run_depth.change(fn=toggle_visibility, inputs=[run_depth], outputs=[DE_Settings])
+            blend = gr.Slider(0.0, 1.0, 0.5, label="Overlay Blend")
+            # Run Button
+            run = gr.Button("Run Analysis", variant="primary")
+        # Right panel
+        with gr.Column(scale=1):
+            # Only one is shown at a time — image or video
+            img_out = gr.Image(label="Preview / Processed Output", visible=False)
+            vid_out = gr.Video(label="Preview / Processed Video", visible=False, streaming=True, autoplay=True)
+            json_out = gr.JSON(label="Scene JSON")
+            zip_out = gr.File(label="Download Results")
+            with gr.Row():
+                clear_button = gr.Button("🧹 Clear Model Cache")
+                refresh_auth_button = gr.Button("🔄 Refresh Auth Status")
+            status_box = gr.Textbox(label="Status", interactive=False)
+        clear_button.click(fn=clear_model_cache, inputs=[], outputs=[status_box])
+        refresh_auth_button.click(fn=check_auth_status, inputs=[], outputs=[auth_status])
+    media_upload.change(show_preview_from_upload, inputs=media_upload, outputs=[img_out, vid_out])
+    url.submit(show_preview_from_url, inputs=url, outputs=[img_out, vid_out])
+    # Button Click Event
+    run.click(
+        fn=handle,
+        inputs=[
+            mode, media_upload, url,
+            run_det, det_model, det_confidence,
+            run_seg, seg_model,
+            run_depth, depth_model,
+            blend
+        ],
+        outputs=[
+            img_out,  # will be visible only if it's an image
+            vid_out,  # will be visible only if it's a video
+            json_out,
+            zip_out
+        ]
+    )
+    # Footer Section
+    gr.Markdown("---")
+    gr.Markdown(
+        f"""
+        <div style='text-align: center; font-size: 14px;'>
+            Built by <b>Durga Deepak Valluri</b><br>
+            <a href="https://github.com/DurgaDeepakValluri" target="_blank">GitHub</a> |
+            <a href="https://deecoded.io" target="_blank">Website</a> |
+            <a href="https://www.linkedin.com/in/durga-deepak-valluri" target="_blank">LinkedIn</a><br>
+            <span style='font-size: 12px; color: #666;'>
+                {'🔐 HF Authentication Active' if HF_TOKEN else '🔓 No HF Authentication'}
+            </span>
+        </div>
+        """,
+    )
+# Launch the Gradio App
 if __name__ == "__main__":
+    demo.launch()