arasuezofis commited on
Commit
3a1ba6d
·
verified ·
1 Parent(s): 1fcca49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -163,24 +163,30 @@ def build_messages(history: List[Tuple[str, str]], user_text: str, images: List[
163
  def generate_reply(images: List[Image.Image], user_text: str, chat_history: List[Tuple[str, str]]):
164
  """
165
  Stream a model reply grounded on provided images + user question + compact chat history.
166
- Key fix: use tokenizer.apply_chat_template and a streamer built with the same tokenizer.
167
  """
168
  messages = build_messages(chat_history, user_text, images)
169
 
170
- # Text inputs via tokenizer chat template
171
- text_inputs = tokenizer.apply_chat_template(
172
  messages,
173
  add_generation_prompt=True,
174
- tokenize=True,
 
 
 
 
 
175
  return_tensors="pt"
176
  ).to(DEVICE)
177
 
178
- # Vision tensors via processor
179
  vision_inputs = processor(images=images, return_tensors="pt").to(DEVICE)
180
 
181
- # Merge dicts (input_ids, attention_mask, pixel_values)
182
  model_inputs = {**text_inputs, **vision_inputs}
183
 
 
184
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
185
  gen_kwargs = dict(
186
  **model_inputs,
@@ -200,6 +206,7 @@ def generate_reply(images: List[Image.Image], user_text: str, chat_history: List
200
  yield partial
201
 
202
 
 
203
  # -----------------------------
204
  # Gradio UI Orchestration
205
  # -----------------------------
 
163
  def generate_reply(images: List[Image.Image], user_text: str, chat_history: List[Tuple[str, str]]):
164
  """
165
  Stream a model reply grounded on provided images + user question + compact chat history.
166
+ Key fix: build text with chat template (string), then tokenize to get a dict.
167
  """
168
  messages = build_messages(chat_history, user_text, images)
169
 
170
+ # 1) Get the chat prompt as TEXT (not tokens)
171
+ prompt_text = tokenizer.apply_chat_template(
172
  messages,
173
  add_generation_prompt=True,
174
+ tokenize=False, # <-- IMPORTANT: return string
175
+ )
176
+
177
+ # 2) Tokenize to get a dict (input_ids, attention_mask)
178
+ text_inputs = tokenizer(
179
+ prompt_text,
180
  return_tensors="pt"
181
  ).to(DEVICE)
182
 
183
+ # 3) Vision tensors (dict with pixel_values)
184
  vision_inputs = processor(images=images, return_tensors="pt").to(DEVICE)
185
 
186
+ # 4) Merge dicts safely
187
  model_inputs = {**text_inputs, **vision_inputs}
188
 
189
+ # 5) Stream with the same tokenizer
190
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
191
  gen_kwargs = dict(
192
  **model_inputs,
 
206
  yield partial
207
 
208
 
209
+
210
  # -----------------------------
211
  # Gradio UI Orchestration
212
  # -----------------------------