# app.py from unsloth import FastVisionModel # For LLMs, use FastLanguageModel import torch import gradio as gr # Load model and tokenizer system_message = ( "You are Ana-bot. Your role is to detect drones in the image. " "Describe the drone (speed, color, etc.) and return its bounding box.\n\n" ) model, tokenizer = FastVisionModel.from_pretrained( "tetttssts/llama_adapter", load_in_4bit=True, use_gradient_checkpointing="unsloth" ) # Inference function def infer(image, query): try: messages = [{ "role": "user", "content": [ {"type": "image", "image": image}, {"type": "text", "text": system_message + query} ] }] input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) inputs = tokenizer( image=image, text=input_text, add_special_tokens=False, return_tensors="pt" ).to("cuda") output = model.generate( **inputs, max_new_tokens=128, use_cache=True, temperature=1.5, min_p=0.1 ) decoded_output = tokenizer.decode(output[0], skip_special_tokens=True) return decoded_output except Exception as e: return f"Error during inference: {str(e)}" # Gradio interface interface = gr.Interface( fn=infer, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Enter your query") ], outputs=gr.Textbox(label="Model Response"), title="Drone Detector - Ana-bot", description="Upload an image and ask the bot to describe drones and detect them." ) # Run if __name__ == "__main__": interface.launch()