Spaces:
Sleeping
Sleeping
File size: 2,998 Bytes
742955b 6f7684a 742955b 6f7684a 29b207e 6f7684a 742955b 29b207e 742955b 29b207e 742955b 29b207e 742955b 29b207e 742955b 29b207e 742955b 29b207e 742955b 29b207e 742955b 29b207e 742955b 29b207e 742955b 29b207e 742955b 29b207e 742955b 29b207e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import torch
from transformers import AutoProcessor, AutoModelForVision2Seq
from huggingface_hub import login
import gradio as gr
import os
import gc
# ----------------------------
# AUTHENTICATION
# ----------------------------
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
login(token=HF_TOKEN)
else:
print("No HF_TOKEN found. Please log in manually.")
login()
# ----------------------------
# CONFIG
# ----------------------------
MODEL_NAME = "reverseforward/inferencemodel"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16
# Clear cache before loading
gc.collect()
if DEVICE == "cuda":
torch.cuda.empty_cache()
# ----------------------------
# LOAD MODEL (with error handling)
# ----------------------------
print(f"Loading model on {DEVICE}...")
try:
model = AutoModelForVision2Seq.from_pretrained(
MODEL_NAME,
torch_dtype=DTYPE,
device_map="auto",
token=HF_TOKEN,
low_cpu_mem_usage=True, # Reduce memory usage
)
processor = AutoProcessor.from_pretrained(
MODEL_NAME,
token=HF_TOKEN,
)
print("✓ Model loaded successfully.")
except Exception as e:
print(f"✗ Error loading model: {e}")
raise
# ----------------------------
# INFERENCE FUNCTION
# ----------------------------
def chat_with_image(image, text):
try:
if image is None or text.strip() == "":
return "Please provide both an image and text input."
# Clear memory before inference
gc.collect()
if DEVICE == "cuda":
torch.cuda.empty_cache()
# Prepare inputs
inputs = processor(
text=[text],
images=[image],
return_tensors="pt"
).to(DEVICE, DTYPE)
# Generate output
with torch.inference_mode():
generated_ids = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
do_sample=True,
)
output = processor.batch_decode(
generated_ids,
skip_special_tokens=True
)[0]
# Clean up
del inputs, generated_ids
gc.collect()
return output.strip()
except Exception as e:
return f"Error during inference: {str(e)}"
# ----------------------------
# GRADIO UI
# ----------------------------
title = "🧠 Qwen3-VL-8B Fine-tuned (Image + Text)"
description = """
Upload an image and enter a text prompt.
The model will reason visually and respond.
"""
demo = gr.Interface(
fn=chat_with_image,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(label="Enter Instruction or Question", lines=3),
],
outputs=gr.Textbox(label="Model Output", lines=5),
title=title,
description=description,
allow_flagging="never", # Disable flagging to reduce overhead
)
if __name__ == "__main__":
demo.launch(show_error=True) |