Spaces:
Runtime error
Runtime error
ivelin
commited on
Commit
·
e8e6698
1
Parent(s):
7b3f48a
fix: bugs
Browse filesSigned-off-by: ivelin <ivelin.eth@gmail.com>
app.py
CHANGED
|
@@ -4,7 +4,6 @@ from PIL import Image, ImageDraw
|
|
| 4 |
import math
|
| 5 |
import torch
|
| 6 |
import html
|
| 7 |
-
import json
|
| 8 |
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
| 9 |
|
| 10 |
pretrained_repo_name = "ivelin/donut-refexp-draft"
|
|
@@ -56,7 +55,6 @@ def process_refexp(image: Image, prompt: str):
|
|
| 56 |
print(
|
| 57 |
fr"predicted decoder sequence before token2json: {html.escape(sequence)}")
|
| 58 |
bbox = processor.token2json(sequence)
|
| 59 |
-
bbox = json.loads(bbox)
|
| 60 |
print(f"predicted bounding box: {bbox}")
|
| 61 |
|
| 62 |
print(f"image object: {image}")
|
|
@@ -65,10 +63,15 @@ def process_refexp(image: Image, prompt: str):
|
|
| 65 |
print(f"image width, height: {width, height}")
|
| 66 |
print(f"processed prompt: {prompt}")
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
print(
|
| 74 |
f"to image pixel values: xmin, ymin, xmax, ymax: {xmin, ymin, xmax, ymax}")
|
|
|
|
| 4 |
import math
|
| 5 |
import torch
|
| 6 |
import html
|
|
|
|
| 7 |
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
| 8 |
|
| 9 |
pretrained_repo_name = "ivelin/donut-refexp-draft"
|
|
|
|
| 55 |
print(
|
| 56 |
fr"predicted decoder sequence before token2json: {html.escape(sequence)}")
|
| 57 |
bbox = processor.token2json(sequence)
|
|
|
|
| 58 |
print(f"predicted bounding box: {bbox}")
|
| 59 |
|
| 60 |
print(f"image object: {image}")
|
|
|
|
| 63 |
print(f"image width, height: {width, height}")
|
| 64 |
print(f"processed prompt: {prompt}")
|
| 65 |
|
| 66 |
+
# safeguard in case text prediction is missing some bounding box coordinates
|
| 67 |
+
xmin = math.floor(width*float(bbox["xmin"])
|
| 68 |
+
) if bbox.get("xmin") is not None else 0
|
| 69 |
+
ymin = math.floor(
|
| 70 |
+
height*float(bbox["ymin"])) if bbox.get("ymin") is not None else 0
|
| 71 |
+
xmax = math.floor(width*float(bbox["xmax"])
|
| 72 |
+
) if bbox.get("xmax") is not None else 1
|
| 73 |
+
ymax = math.floor(
|
| 74 |
+
height*float(bbox["ymax"])) if bbox.get("ymax") is not None else 1
|
| 75 |
|
| 76 |
print(
|
| 77 |
f"to image pixel values: xmin, ymin, xmax, ymax: {xmin, ymin, xmax, ymax}")
|