TangYiJay commited on
Commit
7d26df7
·
verified ·
1 Parent(s): 3041479

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForVision2Seq
3
+ import torch
4
+ from PIL import Image
5
+
6
+ # Load model and processor
7
+ MODEL_ID = "liuhaotian/llava-v1.6-vicuna-7b"
8
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
9
+ model = AutoModelForVision2Seq.from_pretrained(MODEL_ID, torch_dtype=torch.float16, low_cpu_mem_usage=True)
10
+
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+ model.to(device)
13
+
14
+ base_image = None
15
+
16
+ def set_base(image):
17
+ global base_image
18
+ base_image = image
19
+ return "✅ Base image has been set."
20
+
21
+ def detect_object(image, prompt):
22
+ if base_image is None:
23
+ return "⚠️ Please upload a base image first."
24
+
25
+ query = (
26
+ f"Ignore the base image and focus only on new or added objects. "
27
+ f"Base image and detection image are given. {prompt or 'Describe and identify the materials of new objects.'}"
28
+ )
29
+
30
+ inputs = processor(
31
+ text=query,
32
+ images=[base_image, image],
33
+ return_tensors="pt"
34
+ ).to(device, torch.float16)
35
+
36
+ output = model.generate(**inputs, max_new_tokens=256)
37
+ result = processor.decode(output[0], skip_special_tokens=True)
38
+ return result
39
+
40
+ # Build Gradio UI
41
+ with gr.Blocks(title="LLaVA Object & Material Detector") as demo:
42
+ gr.Markdown("## 🧠 LLaVA-1.6 (Vicuna-7B) Object & Material Detection\nUpload a base image first, then upload another image to detect new objects while ignoring the base.")
43
+
44
+ with gr.Row():
45
+ with gr.Column():
46
+ base_img = gr.Image(label="Base Image", type="pil")
47
+ set_base_btn = gr.Button("Set as Base Image")
48
+ set_base_status = gr.Textbox(label="Status")
49
+
50
+ with gr.Column():
51
+ target_img = gr.Image(label="Detection Image", type="pil")
52
+ user_prompt = gr.Textbox(label="Custom Instruction", placeholder="e.g. Detect added objects and describe their material.")
53
+ run_btn = gr.Button("Run Detection")
54
+ output_box = gr.Textbox(label="Model Output")
55
+
56
+ set_base_btn.click(set_base, inputs=base_img, outputs=set_base_status)
57
+ run_btn.click(detect_object, inputs=[target_img, user_prompt], outputs=output_box)
58
+
59
+ demo.launch()