|
|
|
|
|
from unsloth import FastVisionModel
|
|
|
import torch
|
|
|
import gradio as gr
|
|
|
|
|
|
|
|
|
system_message = (
|
|
|
"You are Ana-bot. Your role is to detect drones in the image. "
|
|
|
"Describe the drone (speed, color, etc.) and return its bounding box.\n\n"
|
|
|
)
|
|
|
|
|
|
model, tokenizer = FastVisionModel.from_pretrained(
|
|
|
"tetttssts/llama_adapter",
|
|
|
load_in_4bit=True,
|
|
|
use_gradient_checkpointing="unsloth"
|
|
|
)
|
|
|
|
|
|
|
|
|
def infer(image, query):
|
|
|
try:
|
|
|
messages = [{
|
|
|
"role": "user",
|
|
|
"content": [
|
|
|
{"type": "image", "image": image},
|
|
|
{"type": "text", "text": system_message + query}
|
|
|
]
|
|
|
}]
|
|
|
|
|
|
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
|
|
|
|
|
|
inputs = tokenizer(
|
|
|
image=image,
|
|
|
text=input_text,
|
|
|
add_special_tokens=False,
|
|
|
return_tensors="pt"
|
|
|
).to("cuda")
|
|
|
|
|
|
output = model.generate(
|
|
|
**inputs,
|
|
|
max_new_tokens=128,
|
|
|
use_cache=True,
|
|
|
temperature=1.5,
|
|
|
min_p=0.1
|
|
|
)
|
|
|
|
|
|
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
|
|
|
return decoded_output
|
|
|
|
|
|
except Exception as e:
|
|
|
return f"Error during inference: {str(e)}"
|
|
|
|
|
|
|
|
|
interface = gr.Interface(
|
|
|
fn=infer,
|
|
|
inputs=[
|
|
|
gr.Image(type="pil", label="Upload Image"),
|
|
|
gr.Textbox(label="Enter your query")
|
|
|
],
|
|
|
outputs=gr.Textbox(label="Model Response"),
|
|
|
title="Drone Detector - Ana-bot",
|
|
|
description="Upload an image and ask the bot to describe drones and detect them."
|
|
|
)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
interface.launch()
|
|
|
|