reveseforward commited on
Commit
742955b
·
1 Parent(s): 511cbdb
Files changed (2) hide show
  1. app.py +71 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoProcessor, AutoModelForVision2Seq
3
+ import gradio as gr
4
+
5
+ # ----------------------------
6
+ # CONFIG
7
+ # ----------------------------
8
+ MODEL_NAME = "reverseforward/qwenmeasurement" # change this to your repo name
9
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
10
+ DTYPE = torch.float16 # use float16 on A10G
11
+
12
+ # ----------------------------
13
+ # LOAD MODEL
14
+ # ----------------------------
15
+ print("Loading model...")
16
+ model = AutoModelForVision2Seq.from_pretrained(
17
+ MODEL_NAME,
18
+ torch_dtype=DTYPE,
19
+ device_map="auto",
20
+ )
21
+ processor = AutoProcessor.from_pretrained(MODEL_NAME)
22
+ print("Model loaded successfully.")
23
+
24
+ # ----------------------------
25
+ # INFERENCE FUNCTION
26
+ # ----------------------------
27
+ def chat_with_image(image, text):
28
+ if image is None or text.strip() == "":
29
+ return "Please provide both an image and text input."
30
+
31
+ # Prepare inputs for Qwen3-VL
32
+ inputs = processor(text=[text], images=[image], return_tensors="pt").to(DEVICE, DTYPE)
33
+
34
+ # Generate output
35
+ with torch.inference_mode():
36
+ generated_ids = model.generate(
37
+ **inputs,
38
+ max_new_tokens=256,
39
+ temperature=0.7,
40
+ )
41
+
42
+ output = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
43
+ return output.strip()
44
+
45
+
46
+ # ----------------------------
47
+ # GRADIO UI
48
+ # ----------------------------
49
+ title = "🧠 Qwen3-VL-8B Fine-tuned (Image + Text)"
50
+ description = """
51
+ Upload an image and enter a text prompt.
52
+ The model will reason visually and respond.
53
+ """
54
+
55
+ demo = gr.Interface(
56
+ fn=chat_with_image,
57
+ inputs=[
58
+ gr.Image(type="pil", label="Upload Image"),
59
+ gr.Textbox(label="Enter Instruction or Question"),
60
+ ],
61
+ outputs=gr.Textbox(label="Model Output"),
62
+ title=title,
63
+ description=description,
64
+ examples=[
65
+ ["examples/cat.jpg", "Describe this image."],
66
+ ["examples/room.jpg", "How many chairs are visible?"],
67
+ ],
68
+ )
69
+
70
+ if __name__ == "__main__":
71
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch>=2.1.0
2
+ transformers>=4.44.0
3
+ accelerate
4
+ gradio>=4.0.0
5
+ safetensors
6
+ pillow