import gradio as gr
from transformers import AutoImageProcessor, AutoModelForObjectDetection
import torch
from PIL import Image, ImageDraw, ImageFont
import numpy as np

# Load your model
MODEL_ID = "Meenu047/RGTB_Aerial_view_detection"

print("Loading model...")
processor = AutoImageProcessor.from_pretrained(MODEL_ID)
model = AutoModelForObjectDetection.from_pretrained(MODEL_ID)
print("Model loaded successfully!")

def predict(image):
    """
    Run object detection on the input image
    """
    if image is None:
        return None, "Please upload an image"
    
    # Prepare image
    inputs = processor(images=image, return_tensors="pt")
    
    # Run inference
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Post-process results
    target_sizes = torch.tensor([image.size[::-1]])
    results = processor.post_process_object_detection(
        outputs, 
        target_sizes=target_sizes, 
        threshold=0.5
    )[0]
    
    # Draw bounding boxes
    draw = ImageDraw.Draw(image)
    
    # Try to use a nice font, fallback to default if not available
    try:
        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 20)
    except:
        font = ImageFont.load_default()
    
    detections = []
    colors = ['red', 'blue', 'green', 'yellow', 'purple', 'orange', 'pink', 'cyan']
    
    for idx, (score, label, box) in enumerate(zip(results["scores"], results["labels"], results["boxes"])):
        box = [round(i, 2) for i in box.tolist()]
        confidence = round(score.item(), 3)
        label_name = model.config.id2label[label.item()]
        
        # Draw rectangle
        color = colors[idx % len(colors)]
        draw.rectangle(box, outline=color, width=3)
        
        # Draw label
        text = f"{label_name}: {confidence:.2f}"
        text_bbox = draw.textbbox((box[0], box[1]), text, font=font)
        draw.rectangle(text_bbox, fill=color)
        draw.text((box[0], box[1]), text, fill='white', font=font)
        
        detections.append({
            "Label": label_name,
            "Confidence": f"{confidence * 100:.1f}%",
            "Box": f"({int(box[0])}, {int(box[1])}) - ({int(box[2])}, {int(box[3])})"
        })
    
    # Create results text
    if len(detections) == 0:
        results_text = "No objects detected with confidence > 50%"
    else:
        results_text = f"**Detected {len(detections)} object(s):**\n\n"
        for i, det in enumerate(detections, 1):
            results_text += f"**{i}. {det['Label']}**\n"
            results_text += f"   - Confidence: {det['Confidence']}\n"
            results_text += f"   - Location: {det['Box']}\n\n"
    
    return image, results_text

# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # 🚁 RGTB Aerial View Detection
        Upload an aerial image to detect objects using the trained model.
        """
    )
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(
                type="pil", 
                label="Upload Aerial Image",
                height=400
            )
            predict_btn = gr.Button("🔍 Run Detection", variant="primary", size="lg")
            
        with gr.Column():
            output_image = gr.Image(
                type="pil", 
                label="Detection Results",
                height=400
            )
            output_text = gr.Markdown(label="Detected Objects")
    
    gr.Examples(
        examples=[],  # Add example images here if you have any
        inputs=input_image,
    )
    
    predict_btn.click(
        fn=predict,
        inputs=input_image,
        outputs=[output_image, output_text]
    )
    
    gr.Markdown(
        """
        ### How to use:
        1. Upload or drag & drop an aerial image
        2. Click "Run Detection" button
        3. View the detected objects with bounding boxes and confidence scores
        
        **Model:** `Meenu047/RGTB_Aerial_view_detection`
        """
    )

if __name__ == "__main__":
    demo.launch()