File size: 2,998 Bytes
742955b
 
6f7684a
742955b
6f7684a
29b207e
6f7684a
 
 
 
 
 
 
 
 
 
742955b
 
 
 
29b207e
742955b
29b207e
 
 
 
 
 
742955b
 
29b207e
742955b
29b207e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742955b
 
 
 
 
29b207e
 
 
742955b
29b207e
 
 
 
742955b
29b207e
 
 
 
 
 
742955b
29b207e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742955b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29b207e
742955b
29b207e
742955b
 
29b207e
 
742955b
 
 
29b207e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import torch
from transformers import AutoProcessor, AutoModelForVision2Seq
from huggingface_hub import login
import gradio as gr
import os
import gc

# ----------------------------
# AUTHENTICATION
# ----------------------------
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
    login(token=HF_TOKEN)
else:
    print("No HF_TOKEN found. Please log in manually.")
    login()

# ----------------------------
# CONFIG
# ----------------------------
MODEL_NAME = "reverseforward/inferencemodel"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16

# Clear cache before loading
gc.collect()
if DEVICE == "cuda":
    torch.cuda.empty_cache()

# ----------------------------
# LOAD MODEL (with error handling)
# ----------------------------
print(f"Loading model on {DEVICE}...")
try:
    model = AutoModelForVision2Seq.from_pretrained(
        MODEL_NAME,
        torch_dtype=DTYPE,
        device_map="auto",
        token=HF_TOKEN,
        low_cpu_mem_usage=True,  # Reduce memory usage
    )
    processor = AutoProcessor.from_pretrained(
        MODEL_NAME,
        token=HF_TOKEN,
    )
    print("✓ Model loaded successfully.")
except Exception as e:
    print(f"✗ Error loading model: {e}")
    raise

# ----------------------------
# INFERENCE FUNCTION
# ----------------------------
def chat_with_image(image, text):
    try:
        if image is None or text.strip() == "":
            return "Please provide both an image and text input."

        # Clear memory before inference
        gc.collect()
        if DEVICE == "cuda":
            torch.cuda.empty_cache()

        # Prepare inputs
        inputs = processor(
            text=[text],
            images=[image],
            return_tensors="pt"
        ).to(DEVICE, DTYPE)

        # Generate output
        with torch.inference_mode():
            generated_ids = model.generate(
                **inputs,
                max_new_tokens=256,
                temperature=0.7,
                do_sample=True,
            )

        output = processor.batch_decode(
            generated_ids,
            skip_special_tokens=True
        )[0]
        
        # Clean up
        del inputs, generated_ids
        gc.collect()
        
        return output.strip()
    
    except Exception as e:
        return f"Error during inference: {str(e)}"


# ----------------------------
# GRADIO UI
# ----------------------------
title = "🧠 Qwen3-VL-8B Fine-tuned (Image + Text)"
description = """
Upload an image and enter a text prompt.  
The model will reason visually and respond.
"""

demo = gr.Interface(
    fn=chat_with_image,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Textbox(label="Enter Instruction or Question", lines=3),
    ],
    outputs=gr.Textbox(label="Model Output", lines=5),
    title=title,
    description=description,

    allow_flagging="never",  # Disable flagging to reduce overhead
)

if __name__ == "__main__":
    demo.launch(show_error=True)