Spaces:
Sleeping
Sleeping
Update ocr_utils.py
Browse files- ocr_utils.py +29 -50
ocr_utils.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import io, re
|
| 2 |
-
from typing import List, Dict, Tuple, Any
|
| 3 |
from PIL import Image, ImageDraw
|
| 4 |
|
| 5 |
try:
|
|
@@ -9,16 +8,10 @@ except Exception:
|
|
| 9 |
pytesseract = None
|
| 10 |
TessOutput = None
|
| 11 |
|
| 12 |
-
# regex for price detection
|
| 13 |
-
PRICE_RE = re.compile(r"(\$\s*\d{1,4}(?:\.\d{2})?|\b\d{1,4}\.\d{2}\b)")
|
| 14 |
-
|
| 15 |
def ocr_image(image_bytes: bytes):
|
| 16 |
"""
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
- tokens: list of {'text','conf','box'}
|
| 20 |
-
- size: (width, height)
|
| 21 |
-
Always returns 3 values, even if OCR fails.
|
| 22 |
"""
|
| 23 |
try:
|
| 24 |
img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
|
|
@@ -34,60 +27,46 @@ def ocr_image(image_bytes: bytes):
|
|
| 34 |
return "", [], img.size
|
| 35 |
|
| 36 |
tokens = []
|
| 37 |
-
for i,
|
| 38 |
-
if not
|
| 39 |
continue
|
| 40 |
try:
|
| 41 |
conf = float(data.get("conf", ["-1"])[i])
|
| 42 |
-
except:
|
| 43 |
conf = -1.0
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
return
|
| 49 |
|
| 50 |
|
| 51 |
-
def guess_price(tokens
|
| 52 |
-
"""
|
| 53 |
-
|
| 54 |
for t in tokens:
|
| 55 |
-
m =
|
| 56 |
if m:
|
| 57 |
-
|
| 58 |
try:
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
if best is None or val < best[0]:
|
| 62 |
-
best = (val, t["box"])
|
| 63 |
-
except:
|
| 64 |
pass
|
| 65 |
-
return
|
| 66 |
|
| 67 |
|
| 68 |
-
def guess_title(
|
| 69 |
-
"""
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
return ""
|
| 73 |
-
words = [w for w in re.findall(r"[A-Za-z][A-Za-z0-9\-]{2,}", text) if not w.isdigit()]
|
| 74 |
-
return " ".join(words[:4]) if words else "Item"
|
| 75 |
|
| 76 |
|
| 77 |
-
def annotate_price_box(image_bytes: bytes, box
|
| 78 |
-
"""Draw
|
| 79 |
try:
|
| 80 |
-
img = Image.open(io.
|
| 81 |
-
draw = ImageDraw.Draw(img)
|
| 82 |
-
x, y, w, h = box
|
| 83 |
-
draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=3)
|
| 84 |
-
if label:
|
| 85 |
-
tw = max(50, int(len(label) * 8))
|
| 86 |
-
y0 = max(0, y - 22)
|
| 87 |
-
draw.rectangle([x, y0, x + tw, y0 + 22], fill=(255, 0, 0))
|
| 88 |
-
draw.text((x + 4, y0 + 4), label, fill=(255, 255, 255))
|
| 89 |
-
out = io.BytesIO()
|
| 90 |
-
img.save(out, format="PNG")
|
| 91 |
-
return out.getvalue()
|
| 92 |
-
except Exception:
|
| 93 |
-
return image_bytes
|
|
|
|
| 1 |
import io, re
|
|
|
|
| 2 |
from PIL import Image, ImageDraw
|
| 3 |
|
| 4 |
try:
|
|
|
|
| 8 |
pytesseract = None
|
| 9 |
TessOutput = None
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
def ocr_image(image_bytes: bytes):
|
| 12 |
"""
|
| 13 |
+
Always return exactly three values: (text, tokens, size)
|
| 14 |
+
even if OCR fails or is missing.
|
|
|
|
|
|
|
|
|
|
| 15 |
"""
|
| 16 |
try:
|
| 17 |
img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
|
|
|
|
| 27 |
return "", [], img.size
|
| 28 |
|
| 29 |
tokens = []
|
| 30 |
+
for i, word in enumerate(data["text"]):
|
| 31 |
+
if not word:
|
| 32 |
continue
|
| 33 |
try:
|
| 34 |
conf = float(data.get("conf", ["-1"])[i])
|
| 35 |
+
except Exception:
|
| 36 |
conf = -1.0
|
| 37 |
+
box = (
|
| 38 |
+
data["left"][i],
|
| 39 |
+
data["top"][i],
|
| 40 |
+
data["width"][i],
|
| 41 |
+
data["height"][i],
|
| 42 |
+
)
|
| 43 |
+
tokens.append({"text": word, "conf": conf, "box": box})
|
| 44 |
|
| 45 |
+
full_text = " ".join(t["text"] for t in tokens)
|
| 46 |
+
return full_text, tokens, img.size
|
| 47 |
|
| 48 |
|
| 49 |
+
def guess_price(tokens):
|
| 50 |
+
"""Find first $xx.xx or similar pattern."""
|
| 51 |
+
price_pattern = re.compile(r"\$?\d{1,4}(?:[.,]\d{2})?")
|
| 52 |
for t in tokens:
|
| 53 |
+
m = price_pattern.search(t["text"])
|
| 54 |
if m:
|
| 55 |
+
val = m.group(0)
|
| 56 |
try:
|
| 57 |
+
return float(val.replace("$", "").replace(",", "")), t["box"]
|
| 58 |
+
except Exception:
|
|
|
|
|
|
|
|
|
|
| 59 |
pass
|
| 60 |
+
return None, None
|
| 61 |
|
| 62 |
|
| 63 |
+
def guess_title(text: str) -> str:
|
| 64 |
+
"""Simplify long OCR text into a short title."""
|
| 65 |
+
words = re.findall(r"[A-Za-z0-9\-]{3,}", text)
|
| 66 |
+
return " ".join(words[:5]) if words else "Item"
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
+
def annotate_price_box(image_bytes: bytes, box, label: str = None):
|
| 70 |
+
"""Draw red rectangle around detected price."""
|
| 71 |
try:
|
| 72 |
+
img = Image.open(io.By
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|