# app.py
from unsloth import FastVisionModel  # For LLMs, use FastLanguageModel
import torch
import gradio as gr

# Load model and tokenizer
system_message = (
    "You are Ana-bot. Your role is to detect drones in the image. "
    "Describe the drone (speed, color, etc.) and return its bounding box.\n\n"
)

model, tokenizer = FastVisionModel.from_pretrained(
    "tetttssts/llama_adapter",
    load_in_4bit=True,
    use_gradient_checkpointing="unsloth"
)

# Inference function
def infer(image, query):
    try:
        messages = [{
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": system_message + query}
            ]
        }]

        input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)

        inputs = tokenizer(
            image=image,
            text=input_text,
            add_special_tokens=False,
            return_tensors="pt"
        ).to("cuda")

        output = model.generate(
            **inputs,
            max_new_tokens=128,
            use_cache=True,
            temperature=1.5,
            min_p=0.1
        )

        decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
        return decoded_output

    except Exception as e:
        return f"Error during inference: {str(e)}"

# Gradio interface
interface = gr.Interface(
    fn=infer,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Textbox(label="Enter your query")
    ],
    outputs=gr.Textbox(label="Model Response"),
    title="Drone Detector - Ana-bot",
    description="Upload an image and ask the bot to describe drones and detect them."
)

# Run
if __name__ == "__main__":
    interface.launch()