Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -190,12 +190,21 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
| 190 |
|
| 191 |
images = [image.convert("RGB")]
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
| 201 |
generation_kwargs = {
|
|
|
|
| 190 |
|
| 191 |
images = [image.convert("RGB")]
|
| 192 |
|
| 193 |
+
# --- FIX: Handle different prompt formats required by models ---
|
| 194 |
+
if model_name == "PaddleOCR":
|
| 195 |
+
# PaddleOCR's template expects a simple string content for the text part.
|
| 196 |
+
# The image is passed to the processor separately.
|
| 197 |
+
messages = [{"role": "user", "content": text}]
|
| 198 |
+
prompt = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
|
| 199 |
+
inputs = processor(text=prompt, images=images, return_tensors="pt").to(device)
|
| 200 |
+
else:
|
| 201 |
+
# Nanonets and Dots.OCR support the modern list format for multimodal content.
|
| 202 |
+
messages = [
|
| 203 |
+
{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}
|
| 204 |
+
]
|
| 205 |
+
prompt = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
|
| 206 |
+
inputs = processor(text=prompt, images=images, return_tensors="pt").to(device)
|
| 207 |
+
# --- END FIX ---
|
| 208 |
|
| 209 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
| 210 |
generation_kwargs = {
|