reveseforward commited on
Commit
29b207e
·
1 Parent(s): 6f7684a
Files changed (1) hide show
  1. app.py +66 -37
app.py CHANGED
@@ -3,62 +3,93 @@ from transformers import AutoProcessor, AutoModelForVision2Seq
3
  from huggingface_hub import login
4
  import gradio as gr
5
  import os
 
6
 
7
  # ----------------------------
8
  # AUTHENTICATION
9
  # ----------------------------
10
- # Option 1: Use HF token from environment variable (recommended for Spaces)
11
  HF_TOKEN = os.getenv("HF_TOKEN")
12
  if HF_TOKEN:
13
  login(token=HF_TOKEN)
14
  else:
15
- # Option 2: Interactive login (for local testing)
16
  print("No HF_TOKEN found. Please log in manually.")
17
  login()
18
 
19
  # ----------------------------
20
  # CONFIG
21
  # ----------------------------
22
- MODEL_NAME = "reverseforward/inferencemodel" # change this to your repo name
23
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
24
- DTYPE = torch.float16 # use float16 on A10G
 
 
 
 
 
25
 
26
  # ----------------------------
27
- # LOAD MODEL
28
  # ----------------------------
29
- print("Loading model...")
30
- model = AutoModelForVision2Seq.from_pretrained(
31
- MODEL_NAME,
32
- torch_dtype=DTYPE,
33
- device_map="auto",
34
- token=HF_TOKEN, # Pass token explicitly
35
- )
36
- processor = AutoProcessor.from_pretrained(
37
- MODEL_NAME,
38
- token=HF_TOKEN,
39
- )
40
- print("Model loaded successfully.")
 
 
 
 
 
41
 
42
  # ----------------------------
43
  # INFERENCE FUNCTION
44
  # ----------------------------
45
  def chat_with_image(image, text):
46
- if image is None or text.strip() == "":
47
- return "Please provide both an image and text input."
 
48
 
49
- # Prepare inputs for Qwen3-VL
50
- inputs = processor(text=[text], images=[image], return_tensors="pt").to(DEVICE, DTYPE)
 
 
51
 
52
- # Generate output
53
- with torch.inference_mode():
54
- generated_ids = model.generate(
55
- **inputs,
56
- max_new_tokens=256,
57
- temperature=0.7,
58
- )
59
 
60
- output = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
61
- return output.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
 
64
  # ----------------------------
@@ -74,16 +105,14 @@ demo = gr.Interface(
74
  fn=chat_with_image,
75
  inputs=[
76
  gr.Image(type="pil", label="Upload Image"),
77
- gr.Textbox(label="Enter Instruction or Question"),
78
  ],
79
- outputs=gr.Textbox(label="Model Output"),
80
  title=title,
81
  description=description,
82
- examples=[
83
- ["examples/cat.jpg", "Describe this image."],
84
- ["examples/room.jpg", "How many chairs are visible?"],
85
- ],
86
  )
87
 
88
  if __name__ == "__main__":
89
- demo.launch()
 
3
  from huggingface_hub import login
4
  import gradio as gr
5
  import os
6
+ import gc
7
 
8
  # ----------------------------
9
  # AUTHENTICATION
10
  # ----------------------------
 
11
  HF_TOKEN = os.getenv("HF_TOKEN")
12
  if HF_TOKEN:
13
  login(token=HF_TOKEN)
14
  else:
 
15
  print("No HF_TOKEN found. Please log in manually.")
16
  login()
17
 
18
  # ----------------------------
19
  # CONFIG
20
  # ----------------------------
21
+ MODEL_NAME = "reverseforward/inferencemodel"
22
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
23
+ DTYPE = torch.float16
24
+
25
+ # Clear cache before loading
26
+ gc.collect()
27
+ if DEVICE == "cuda":
28
+ torch.cuda.empty_cache()
29
 
30
  # ----------------------------
31
+ # LOAD MODEL (with error handling)
32
  # ----------------------------
33
+ print(f"Loading model on {DEVICE}...")
34
+ try:
35
+ model = AutoModelForVision2Seq.from_pretrained(
36
+ MODEL_NAME,
37
+ torch_dtype=DTYPE,
38
+ device_map="auto",
39
+ token=HF_TOKEN,
40
+ low_cpu_mem_usage=True, # Reduce memory usage
41
+ )
42
+ processor = AutoProcessor.from_pretrained(
43
+ MODEL_NAME,
44
+ token=HF_TOKEN,
45
+ )
46
+ print("✓ Model loaded successfully.")
47
+ except Exception as e:
48
+ print(f"✗ Error loading model: {e}")
49
+ raise
50
 
51
  # ----------------------------
52
  # INFERENCE FUNCTION
53
  # ----------------------------
54
  def chat_with_image(image, text):
55
+ try:
56
+ if image is None or text.strip() == "":
57
+ return "Please provide both an image and text input."
58
 
59
+ # Clear memory before inference
60
+ gc.collect()
61
+ if DEVICE == "cuda":
62
+ torch.cuda.empty_cache()
63
 
64
+ # Prepare inputs
65
+ inputs = processor(
66
+ text=[text],
67
+ images=[image],
68
+ return_tensors="pt"
69
+ ).to(DEVICE, DTYPE)
 
70
 
71
+ # Generate output
72
+ with torch.inference_mode():
73
+ generated_ids = model.generate(
74
+ **inputs,
75
+ max_new_tokens=256,
76
+ temperature=0.7,
77
+ do_sample=True,
78
+ )
79
+
80
+ output = processor.batch_decode(
81
+ generated_ids,
82
+ skip_special_tokens=True
83
+ )[0]
84
+
85
+ # Clean up
86
+ del inputs, generated_ids
87
+ gc.collect()
88
+
89
+ return output.strip()
90
+
91
+ except Exception as e:
92
+ return f"Error during inference: {str(e)}"
93
 
94
 
95
  # ----------------------------
 
105
  fn=chat_with_image,
106
  inputs=[
107
  gr.Image(type="pil", label="Upload Image"),
108
+ gr.Textbox(label="Enter Instruction or Question", lines=3),
109
  ],
110
+ outputs=gr.Textbox(label="Model Output", lines=5),
111
  title=title,
112
  description=description,
113
+
114
+ allow_flagging="never", # Disable flagging to reduce overhead
 
 
115
  )
116
 
117
  if __name__ == "__main__":
118
+ demo.launch(show_error=True)