llama_adapter / app.py
tetttssts's picture
Upload app.py
7c0d136 verified
# app.py
from unsloth import FastVisionModel # For LLMs, use FastLanguageModel
import torch
import gradio as gr
# Load model and tokenizer
system_message = (
"You are Ana-bot. Your role is to detect drones in the image. "
"Describe the drone (speed, color, etc.) and return its bounding box.\n\n"
)
model, tokenizer = FastVisionModel.from_pretrained(
"tetttssts/llama_adapter",
load_in_4bit=True,
use_gradient_checkpointing="unsloth"
)
# Inference function
def infer(image, query):
try:
messages = [{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": system_message + query}
]
}]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
image=image,
text=input_text,
add_special_tokens=False,
return_tensors="pt"
).to("cuda")
output = model.generate(
**inputs,
max_new_tokens=128,
use_cache=True,
temperature=1.5,
min_p=0.1
)
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
return decoded_output
except Exception as e:
return f"Error during inference: {str(e)}"
# Gradio interface
interface = gr.Interface(
fn=infer,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(label="Enter your query")
],
outputs=gr.Textbox(label="Model Response"),
title="Drone Detector - Ana-bot",
description="Upload an image and ask the bot to describe drones and detect them."
)
# Run
if __name__ == "__main__":
interface.launch()