prithivMLmods commited on
Commit
edbf0ea
·
verified ·
1 Parent(s): f75e630

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -190,12 +190,21 @@ def generate_image(model_name: str, text: str, image: Image.Image,
190
 
191
  images = [image.convert("RGB")]
192
 
193
- messages = [
194
- {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}
195
- ]
196
- prompt = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
197
-
198
- inputs = processor(text=prompt, images=images, return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
199
 
200
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
201
  generation_kwargs = {
 
190
 
191
  images = [image.convert("RGB")]
192
 
193
+ # --- FIX: Handle different prompt formats required by models ---
194
+ if model_name == "PaddleOCR":
195
+ # PaddleOCR's template expects a simple string content for the text part.
196
+ # The image is passed to the processor separately.
197
+ messages = [{"role": "user", "content": text}]
198
+ prompt = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
199
+ inputs = processor(text=prompt, images=images, return_tensors="pt").to(device)
200
+ else:
201
+ # Nanonets and Dots.OCR support the modern list format for multimodal content.
202
+ messages = [
203
+ {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}
204
+ ]
205
+ prompt = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
206
+ inputs = processor(text=prompt, images=images, return_tensors="pt").to(device)
207
+ # --- END FIX ---
208
 
209
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
210
  generation_kwargs = {