Joe6636564 commited on
Commit
1b4c043
·
verified ·
1 Parent(s): ecdc3e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -72
app.py CHANGED
@@ -1,103 +1,107 @@
1
  import os
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, AutoProcessor
4
- import gradio as gr
5
  from threading import Thread
6
  from PIL import Image
7
  import numpy as np
 
8
  from fastapi import FastAPI, UploadFile, File, Form
9
  from gradio.routes import mount_gradio_app
 
10
 
11
- # Disable CUDA
 
 
 
 
 
 
 
12
  os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
13
  torch.cuda.is_available = lambda: False
14
-
15
  device = "cpu"
16
- print("Using CPU only")
17
 
18
- # Load Chat Model
19
- MODEL_ID1 = "microsoft/Phi-3.5-mini-instruct"
20
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID1)
 
 
21
  model = AutoModelForCausalLM.from_pretrained(
22
- MODEL_ID1,
23
  torch_dtype=torch.float32,
24
  device_map="cpu",
25
  low_cpu_mem_usage=True
26
- )
 
 
27
 
28
- # Load Vision Model
29
  models = {}
30
  processors = {}
31
 
32
  try:
33
- models["microsoft/Phi-3.5-vision-instruct"] = AutoModelForCausalLM.from_pretrained(
34
- "microsoft/Phi-3.5-vision-instruct",
 
35
  trust_remote_code=True,
36
  torch_dtype=torch.float32,
37
  device_map="cpu",
38
- low_cpu_mem_usage=True
 
39
  ).eval()
40
 
41
- processors["microsoft/Phi-3.5-vision-instruct"] = AutoProcessor.from_pretrained(
42
- "microsoft/Phi-3.5-vision-instruct",
43
  trust_remote_code=True
44
  )
 
45
  print("Vision model loaded ✅")
46
  except Exception as e:
47
  print("Vision model failed to load:", e)
48
 
 
49
 
50
- # -------------- CHAT FUNCTION --------------
51
-
52
- def stream_chat(message, history, system_prompt, temperature, max_new_tokens, top_p, top_k, penalty):
53
  conversation = [{"role": "system", "content": system_prompt}]
54
  for user, assistant in history:
55
  conversation.append({"role": "user", "content": user})
56
  conversation.append({"role": "assistant", "content": assistant})
57
-
58
  conversation.append({"role": "user", "content": message})
59
 
60
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
61
-
62
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
63
- kwargs = dict(
64
- input_ids=input_ids,
65
- max_new_tokens=max_new_tokens,
66
- do_sample=temperature > 0,
67
- temperature=temperature,
68
- top_p=top_p,
69
- top_k=top_k,
70
- repetition_penalty=penalty,
71
- eos_token_id=[128001, 128008, 128009],
72
- streamer=streamer
73
  )
74
 
75
- thread = Thread(target=model.generate, kwargs=kwargs)
76
- thread.start()
77
-
78
- output = ""
79
- for token in streamer:
80
- output += token
81
- yield output
82
-
83
 
84
- # -------------- VISION FUNCTION --------------
85
 
86
- def stream_vision(image, text_input, model_id):
87
  if model_id not in models:
88
- return "Vision model not loaded."
89
 
90
  model_vision = models[model_id]
91
  processor = processors[model_id]
92
 
93
- images = [Image.fromarray(image).convert("RGB")]
94
  placeholder = "<|image_1|>\n"
95
  prompt = placeholder + (text_input or "")
96
 
97
  messages = [{"role": "user", "content": prompt}]
98
  template = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
99
-
100
- inputs = processor(template, images, return_tensors="pt").to(device)
101
 
102
  output = model_vision.generate(
103
  **inputs,
@@ -109,52 +113,49 @@ def stream_vision(image, text_input, model_id):
109
  text = processor.batch_decode(output, skip_special_tokens=True)[0]
110
  return text
111
 
112
-
113
- # -------------- FASTAPI BACKEND --------------
114
 
115
  api = FastAPI()
116
 
117
  @api.get("/health")
118
  def health():
119
- return {"status": "ok", "device": device, "chat_model": MODEL_ID1, "vision_loaded": len(models)>0}
 
 
 
 
 
120
 
121
  @api.post("/api/chat")
122
- async def api_chat(message: str = Form(...), system_prompt: str = Form("You are a helpful assistant")):
123
- conversation = [
124
- {"role": "system", "content": system_prompt},
125
- {"role": "user", "content": message}
126
- ]
127
  input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
128
- out = model.generate(input_ids, max_new_tokens=512, do_sample=False)
129
- reply = tokenizer.decode(out[0][input_ids.shape[1]:], skip_special_tokens=True)
130
  return {"response": reply}
131
 
132
  @api.post("/api/vision")
133
- async def api_vision(image: UploadFile = File(...), text_input: str = Form(""), model_id: str = Form("microsoft/Phi-3.5-vision-instruct")):
134
  img = Image.open(image.file).convert("RGB")
135
- result = stream_vision(np.array(img), text_input, model_id)
136
- return {"response": result}
137
-
138
 
139
- # -------------- GRADIO UI --------------
140
 
141
- def build_gradio_ui():
142
- CSS = """.duplicate-button { margin: auto !important; color: white !important; background: black !important;}"""
143
- with gr.Blocks(css=CSS) as demo:
144
  with gr.Tab("Chat"):
145
- chat = gr.Chatbot(height=600)
146
- gr.ChatInterface(fn=stream_chat, chatbot=chat)
147
 
148
  with gr.Tab("Vision"):
149
  img = gr.Image()
150
  txt = gr.Textbox("What's in this image?")
151
- model_sel = gr.Dropdown(list(models.keys()), value="microsoft/Phi-3.5-vision-instruct")
152
  out = gr.Textbox()
153
- gr.Button("Analyze").click(stream_vision, [img, txt, model_sel], out)
154
 
155
  return demo
156
 
157
-
158
- gradio_app = build_gradio_ui()
159
-
160
  app = mount_gradio_app(api, gradio_app, path="/")
 
1
  import os
2
  import torch
 
 
3
  from threading import Thread
4
  from PIL import Image
5
  import numpy as np
6
+
7
  from fastapi import FastAPI, UploadFile, File, Form
8
  from gradio.routes import mount_gradio_app
9
+ import gradio as gr
10
 
11
+ from transformers import (
12
+ AutoModelForCausalLM,
13
+ AutoTokenizer,
14
+ TextIteratorStreamer,
15
+ AutoProcessor,
16
+ )
17
+
18
+ # Force CPU
19
  os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
20
  torch.cuda.is_available = lambda: False
 
21
  device = "cpu"
22
+ print("Running on CPU ")
23
 
24
+ # ---------------- LOAD MAIN CHAT MODEL ----------------
25
+
26
+ MODEL_ID = "microsoft/Phi-3.5-mini-instruct"
27
+
28
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
29
  model = AutoModelForCausalLM.from_pretrained(
30
+ MODEL_ID,
31
  torch_dtype=torch.float32,
32
  device_map="cpu",
33
  low_cpu_mem_usage=True
34
+ ).eval()
35
+
36
+ # ---------------- LOAD VISION MODEL (FlashAttention disabled) ----------------
37
 
 
38
  models = {}
39
  processors = {}
40
 
41
  try:
42
+ VISION_ID = "microsoft/Phi-3.5-vision-instruct"
43
+ models[VISION_ID] = AutoModelForCausalLM.from_pretrained(
44
+ VISION_ID,
45
  trust_remote_code=True,
46
  torch_dtype=torch.float32,
47
  device_map="cpu",
48
+ low_cpu_mem_usage=True,
49
+ attn_implementation="eager" # <<< KEY FIX ✅
50
  ).eval()
51
 
52
+ processors[VISION_ID] = AutoProcessor.from_pretrained(
53
+ VISION_ID,
54
  trust_remote_code=True
55
  )
56
+
57
  print("Vision model loaded ✅")
58
  except Exception as e:
59
  print("Vision model failed to load:", e)
60
 
61
+ # ---------------- CHAT FUNCTION (for UI) ----------------
62
 
63
+ def chat_simple(message, history):
64
+ system_prompt = "You are a helpful assistant."
65
+
66
  conversation = [{"role": "system", "content": system_prompt}]
67
  for user, assistant in history:
68
  conversation.append({"role": "user", "content": user})
69
  conversation.append({"role": "assistant", "content": assistant})
 
70
  conversation.append({"role": "user", "content": message})
71
 
72
+ input_ids = tokenizer.apply_chat_template(
73
+ conversation,
74
+ add_generation_prompt=True,
75
+ return_tensors="pt"
76
+ ).to(device)
77
+
78
+ output = model.generate(
79
+ input_ids,
80
+ max_new_tokens=256,
81
+ temperature=0.7,
82
+ top_p=0.9,
83
+ do_sample=True
 
84
  )
85
 
86
+ reply = tokenizer.decode(output[0][input_ids.shape[1]:], skip_special_tokens=True)
87
+ return reply
 
 
 
 
 
 
88
 
89
+ # ---------------- VISION FUNCTION ----------------
90
 
91
+ def run_vision(image, text_input, model_id):
92
  if model_id not in models:
93
+ return "⚠️ Vision model not loaded."
94
 
95
  model_vision = models[model_id]
96
  processor = processors[model_id]
97
 
98
+ img = Image.fromarray(image).convert("RGB")
99
  placeholder = "<|image_1|>\n"
100
  prompt = placeholder + (text_input or "")
101
 
102
  messages = [{"role": "user", "content": prompt}]
103
  template = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
104
+ inputs = processor(template, [img], return_tensors="pt")
 
105
 
106
  output = model_vision.generate(
107
  **inputs,
 
113
  text = processor.batch_decode(output, skip_special_tokens=True)[0]
114
  return text
115
 
116
+ # ---------------- FASTAPI BACKEND API ----------------
 
117
 
118
  api = FastAPI()
119
 
120
  @api.get("/health")
121
  def health():
122
+ return {
123
+ "status": "ok",
124
+ "device": device,
125
+ "chat_model": MODEL_ID,
126
+ "vision_loaded": len(models) > 0
127
+ }
128
 
129
  @api.post("/api/chat")
130
+ async def api_chat(message: str = Form(...)):
131
+ conversation = [{"role": "system", "content": "You are a helpful assistant."},
132
+ {"role": "user", "content": message}]
133
+
 
134
  input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
135
+ output = model.generate(input_ids, max_new_tokens=256)
136
+ reply = tokenizer.decode(output[0][input_ids.shape[1]:], skip_special_tokens=True)
137
  return {"response": reply}
138
 
139
  @api.post("/api/vision")
140
+ async def api_vision(image: UploadFile = File(...), text_input: str = Form("Describe this"), model_id: str = Form("microsoft/Phi-3.5-vision-instruct")):
141
  img = Image.open(image.file).convert("RGB")
142
+ return {"response": run_vision(np.array(img), text_input, model_id)}
 
 
143
 
144
+ # ---------------- GRADIO UI ----------------
145
 
146
+ def create_ui():
147
+ with gr.Blocks() as demo:
 
148
  with gr.Tab("Chat"):
149
+ gr.ChatInterface(fn=chat_simple)
 
150
 
151
  with gr.Tab("Vision"):
152
  img = gr.Image()
153
  txt = gr.Textbox("What's in this image?")
154
+ model_sel = gr.Dropdown(choices=list(models.keys()), value=list(models.keys())[0] if models else None)
155
  out = gr.Textbox()
156
+ gr.Button("Analyze").click(run_vision, [img, txt, model_sel], out)
157
 
158
  return demo
159
 
160
+ gradio_app = create_ui()
 
 
161
  app = mount_gradio_app(api, gradio_app, path="/")