Joe6636564 commited on
Commit
0d19a07
·
verified ·
1 Parent(s): 7c7e1f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -41
app.py CHANGED
@@ -40,18 +40,21 @@ model = AutoModelForCausalLM.from_pretrained(
40
  low_cpu_mem_usage=True # Optimize for CPU memory
41
  )
42
 
43
- # Vision model setup
44
  print("Loading vision models...")
45
  models = {}
46
  processors = {}
47
 
48
  try:
 
49
  models["microsoft/Phi-3.5-vision-instruct"] = AutoModelForCausalLM.from_pretrained(
50
  "microsoft/Phi-3.5-vision-instruct",
51
  trust_remote_code=True,
52
  torch_dtype=torch.float32, # Use float32 for CPU
53
  device_map="cpu",
54
- low_cpu_mem_usage=True # Optimize for CPU memory
 
 
55
  ).eval()
56
 
57
  processors["microsoft/Phi-3.5-vision-instruct"] = AutoProcessor.from_pretrained(
@@ -61,6 +64,23 @@ try:
61
  print("Vision model loaded successfully on CPU")
62
  except Exception as e:
63
  print(f"Error loading vision model: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # Chatbot function
66
  def stream_chat(
@@ -137,39 +157,43 @@ def stream_vision(image, text_input=None, model_id="microsoft/Phi-3.5-vision-ins
137
  # Process the inputs with the processor
138
  inputs = processor(prompt, images, return_tensors="pt").to(device)
139
 
140
- # Generation parameters
141
  generation_args = {
142
- "max_new_tokens": 500, # Reduced for CPU
143
  "temperature": 0.0,
144
  "do_sample": False,
145
  }
146
 
147
  # Generate the response
148
- generate_ids = model_vision.generate(
149
- **inputs,
150
- eos_token_id=processor.tokenizer.eos_token_id,
151
- **generation_args
152
- )
153
-
154
- # Remove input tokens from the generated response
155
- generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
156
-
157
- # Decode the generated output
158
- response = processor.batch_decode(
159
- generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
160
- )[0]
161
-
162
- return response
 
 
 
163
 
164
  # Flask API Routes
165
  @flask_app.route('/health', methods=['GET'])
166
  def health_check():
 
167
  return jsonify({
168
  "status": "healthy",
169
  "device": device,
170
  "models_loaded": {
171
  "chatbot": MODEL_ID1 in globals() and 'model' in globals(),
172
- "vision": len(models) > 0
173
  }
174
  })
175
 
@@ -249,10 +273,12 @@ def api_vision():
249
 
250
  @flask_app.route('/api/models', methods=['GET'])
251
  def get_models():
 
252
  return jsonify({
253
  "chat_model": MODEL_ID1,
254
- "vision_models": list(models.keys()),
255
- "device": device
 
256
  })
257
 
258
  def run_flask():
@@ -262,12 +288,18 @@ def run_gradio():
262
  # CSS for the interface
263
  CSS = """.duplicate-button { margin: auto !important; color: white !important; background: black !important; border-radius: 100vh !important;}h3 { text-align: center;}"""
264
  PLACEHOLDER = """<center><p>Hi! I'm your assistant. Feel free to ask your questions</p></center>"""
265
- TITLE = "<h1><center>Phi-3.5 Chatbot & Phi-3.5 Vision (CPU Version)</center></h1>"
266
- EXPLANATION = """<div style="text-align: center; margin-top: 20px;">
 
 
 
 
 
267
  <p><strong>CPU-Only Version</strong> - This instance is running on CPU. Responses may be slower than GPU-accelerated versions.</p>
 
268
  <p>This app supports both the microsoft/Phi-3.5-mini-instruct model for chat bot and the microsoft/Phi-3.5-vision-instruct model for multimodal model.</p>
269
- <p>Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support.</p>
270
- <p>Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data. The model belongs to the Phi-3 model family and supports 128K token context length.</p>
271
  </div>"""
272
  footer = """<div style="text-align: center; margin-top: 20px;">
273
  <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
@@ -278,13 +310,13 @@ def run_gradio():
278
  </div>"""
279
 
280
  # Gradio app with two tabs
281
- with gr.Blocks(css=CSS, theme="small_and_pretty") as demo:
282
  gr.HTML(TITLE)
283
  gr.HTML(EXPLANATION)
284
  gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
285
 
286
  with gr.Tab("Chatbot"):
287
- chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
288
  gr.ChatInterface(
289
  fn=stream_chat,
290
  chatbot=chatbot,
@@ -346,19 +378,33 @@ def run_gradio():
346
  cache_examples=False,
347
  )
348
 
349
- with gr.Tab("Vision"):
350
- with gr.Row():
351
- input_img = gr.Image(label="Input Picture")
352
- with gr.Row():
353
- model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="microsoft/Phi-3.5-vision-instruct")
354
- with gr.Row():
355
- text_input = gr.Textbox(label="Question", value="What's in this image?")
356
- with gr.Row():
357
- submit_btn = gr.Button(value="Submit")
358
- with gr.Row():
359
- output_text = gr.Textbox(label="Output Text")
360
-
361
- submit_btn.click(stream_vision, [input_img, text_input, model_selector], [output_text])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
 
363
  gr.HTML(footer)
364
 
@@ -366,6 +412,13 @@ def run_gradio():
366
  demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
367
 
368
  if __name__ == "__main__":
 
 
 
 
 
 
 
369
  # Start Flask server in a separate thread
370
  flask_thread = threading.Thread(target=run_flask, daemon=True)
371
  flask_thread.start()
 
40
  low_cpu_mem_usage=True # Optimize for CPU memory
41
  )
42
 
43
+ # Vision model setup - FIXED for CPU
44
  print("Loading vision models...")
45
  models = {}
46
  processors = {}
47
 
48
  try:
49
+ # Load vision model without flash_attention_2 for CPU
50
  models["microsoft/Phi-3.5-vision-instruct"] = AutoModelForCausalLM.from_pretrained(
51
  "microsoft/Phi-3.5-vision-instruct",
52
  trust_remote_code=True,
53
  torch_dtype=torch.float32, # Use float32 for CPU
54
  device_map="cpu",
55
+ low_cpu_mem_usage=True, # Optimize for CPU memory
56
+ # Remove flash_attention_2 for CPU compatibility
57
+ _attn_implementation=None
58
  ).eval()
59
 
60
  processors["microsoft/Phi-3.5-vision-instruct"] = AutoProcessor.from_pretrained(
 
64
  print("Vision model loaded successfully on CPU")
65
  except Exception as e:
66
  print(f"Error loading vision model: {e}")
67
+ # Try alternative loading method
68
+ try:
69
+ print("Trying alternative loading method for vision model...")
70
+ models["microsoft/Phi-3.5-vision-instruct"] = AutoModelForCausalLM.from_pretrained(
71
+ "microsoft/Phi-3.5-vision-instruct",
72
+ trust_remote_code=True,
73
+ torch_dtype=torch.float32,
74
+ device_map="cpu"
75
+ ).eval()
76
+
77
+ processors["microsoft/Phi-3.5-vision-instruct"] = AutoProcessor.from_pretrained(
78
+ "microsoft/Phi-3.5-vision-instruct",
79
+ trust_remote_code=True
80
+ )
81
+ print("Vision model loaded successfully with alternative method")
82
+ except Exception as e2:
83
+ print(f"Failed to load vision model with alternative method: {e2}")
84
 
85
  # Chatbot function
86
  def stream_chat(
 
157
  # Process the inputs with the processor
158
  inputs = processor(prompt, images, return_tensors="pt").to(device)
159
 
160
+ # Generation parameters - reduced for CPU
161
  generation_args = {
162
+ "max_new_tokens": 300, # Further reduced for CPU
163
  "temperature": 0.0,
164
  "do_sample": False,
165
  }
166
 
167
  # Generate the response
168
+ try:
169
+ generate_ids = model_vision.generate(
170
+ **inputs,
171
+ eos_token_id=processor.tokenizer.eos_token_id,
172
+ **generation_args
173
+ )
174
+
175
+ # Remove input tokens from the generated response
176
+ generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
177
+
178
+ # Decode the generated output
179
+ response = processor.batch_decode(
180
+ generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
181
+ )[0]
182
+
183
+ return response
184
+ except Exception as e:
185
+ return f"Error generating vision response: {str(e)}"
186
 
187
  # Flask API Routes
188
  @flask_app.route('/health', methods=['GET'])
189
  def health_check():
190
+ vision_loaded = len(models) > 0 and "microsoft/Phi-3.5-vision-instruct" in models
191
  return jsonify({
192
  "status": "healthy",
193
  "device": device,
194
  "models_loaded": {
195
  "chatbot": MODEL_ID1 in globals() and 'model' in globals(),
196
+ "vision": vision_loaded
197
  }
198
  })
199
 
 
273
 
274
  @flask_app.route('/api/models', methods=['GET'])
275
  def get_models():
276
+ vision_loaded = len(models) > 0 and "microsoft/Phi-3.5-vision-instruct" in models
277
  return jsonify({
278
  "chat_model": MODEL_ID1,
279
+ "vision_models": list(models.keys()) if vision_loaded else [],
280
+ "device": device,
281
+ "vision_available": vision_loaded
282
  })
283
 
284
  def run_flask():
 
288
  # CSS for the interface
289
  CSS = """.duplicate-button { margin: auto !important; color: white !important; background: black !important; border-radius: 100vh !important;}h3 { text-align: center;}"""
290
  PLACEHOLDER = """<center><p>Hi! I'm your assistant. Feel free to ask your questions</p></center>"""
291
+
292
+ # Check if vision model is available
293
+ vision_available = len(models) > 0 and "microsoft/Phi-3.5-vision-instruct" in models
294
+ vision_status = "Available" if vision_available else "Not Available"
295
+
296
+ TITLE = f"<h1><center>Phi-3.5 Chatbot & Phi-3.5 Vision (CPU Version)</center></h1>"
297
+ EXPLANATION = f"""<div style="text-align: center; margin-top: 20px;">
298
  <p><strong>CPU-Only Version</strong> - This instance is running on CPU. Responses may be slower than GPU-accelerated versions.</p>
299
+ <p><strong>Vision Model Status:</strong> {vision_status}</p>
300
  <p>This app supports both the microsoft/Phi-3.5-mini-instruct model for chat bot and the microsoft/Phi-3.5-vision-instruct model for multimodal model.</p>
301
+ <p>Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision.</p>
302
+ <p>Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data.</p>
303
  </div>"""
304
  footer = """<div style="text-align: center; margin-top: 20px;">
305
  <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
 
310
  </div>"""
311
 
312
  # Gradio app with two tabs
313
+ with gr.Blocks(css=CSS, theme=gr.themes.Default()) as demo: # Changed to default theme
314
  gr.HTML(TITLE)
315
  gr.HTML(EXPLANATION)
316
  gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
317
 
318
  with gr.Tab("Chatbot"):
319
+ chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER, type="messages") # Fixed deprecated type
320
  gr.ChatInterface(
321
  fn=stream_chat,
322
  chatbot=chatbot,
 
378
  cache_examples=False,
379
  )
380
 
381
+ # Only show vision tab if model is available
382
+ if vision_available:
383
+ with gr.Tab("Vision"):
384
+ with gr.Row():
385
+ input_img = gr.Image(label="Input Picture")
386
+ with gr.Row():
387
+ model_selector = gr.Dropdown(
388
+ choices=list(models.keys()),
389
+ label="Model",
390
+ value="microsoft/Phi-3.5-vision-instruct",
391
+ allow_custom_value=False # Fixed warning
392
+ )
393
+ with gr.Row():
394
+ text_input = gr.Textbox(label="Question", value="What's in this image?")
395
+ with gr.Row():
396
+ submit_btn = gr.Button(value="Submit")
397
+ with gr.Row():
398
+ output_text = gr.Textbox(label="Output Text")
399
+
400
+ submit_btn.click(stream_vision, [input_img, text_input, model_selector], [output_text])
401
+ else:
402
+ with gr.Tab("Vision"):
403
+ gr.HTML("""<div style="text-align: center; padding: 40px;">
404
+ <h3>Vision Model Not Available</h3>
405
+ <p>The vision model failed to load. This is likely due to memory constraints on CPU.</p>
406
+ <p>Try using the chat model instead, or run this on a system with more RAM.</p>
407
+ </div>""")
408
 
409
  gr.HTML(footer)
410
 
 
412
  demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
413
 
414
  if __name__ == "__main__":
415
+ print("=" * 50)
416
+ print("Application Starting Up...")
417
+ print(f"Device: {device}")
418
+ print(f"Chat model loaded: {MODEL_ID1}")
419
+ print(f"Vision model loaded: {len(models) > 0}")
420
+ print("=" * 50)
421
+
422
  # Start Flask server in a separate thread
423
  flask_thread = threading.Thread(target=run_flask, daemon=True)
424
  flask_thread.start()