Spaces:
Sleeping
Sleeping
File size: 3,040 Bytes
b0cc733 ba9dd6c b0cc733 ba9dd6c b0cc733 3a081c4 ba9dd6c 3a081c4 ba9dd6c 3a081c4 ba9dd6c b0cc733 3a081c4 ba9dd6c b0cc733 3a081c4 ba9dd6c b0cc733 84c3056 b0cc733 ba9dd6c 3a081c4 84c3056 b0cc733 3a081c4 ba9dd6c b0cc733 ba9dd6c b0cc733 ba9dd6c b0cc733 ba9dd6c 84c3056 b0cc733 ba9dd6c b0cc733 3a081c4 84c3056 ba9dd6c 84c3056 b0cc733 3a081c4 ba9dd6c 3a081c4 ba9dd6c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | import io, re
from typing import List, Dict, Tuple, Any
from PIL import Image, ImageDraw
try:
import pytesseract
from pytesseract import Output as TessOutput
except Exception:
pytesseract = None
TessOutput = None
PRICE_RE = re.compile(r"(\$\s*\d{1,4}(?:\.\d{2})?|\b\d{1,4}\.\d{2}\b)")
def ocr_image(image_bytes: bytes):
"""
ALWAYS return (full_text, tokens, size) even if OCR fails.
- full_text: str
- tokens: list of {'text','conf','box'}
- size: (width, height)
"""
# load image safely
try:
img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
except Exception:
return "", [], (0, 0)
# no OCR engine → empty, but still a triple
if pytesseract is None:
return "", [], img.size
# run OCR; on error return empty triple
try:
data = pytesseract.image_to_data(img, output_type=TessOutput.DICT)
except Exception:
return "", [], img.size
tokens: List[Dict[str, Any]] = []
for i, txt in enumerate(data["text"]):
if not txt:
continue
try:
conf = float(data.get("conf", ["-1"])[i])
except Exception:
conf = -1.0
x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i]
tokens.append({"text": txt, "conf": conf, "box": (x, y, w, h)})
full_text = " ".join(t["text"] for t in tokens)
return full_text, tokens, img.size
def guess_price(tokens: List[Dict[str, Any]]):
"""Pick smallest plausible price found in token texts. Returns (value, box) or (None, None)."""
best = None
for t in tokens:
m = PRICE_RE.search(t["text"].replace(",", ""))
if m:
raw = m.group(0).replace("$", "").strip()
try:
val = float(raw)
if 0.5 <= val <= 1000:
if best is None or val < best[0]:
best = (val, t["box"])
except Exception:
pass
return best if best else (None, None)
def guess_title(text: str) -> str:
"""Make a short human-readable title from OCR text."""
words = re.findall(r"[A-Za-z0-9\-]{3,}", text or "")
return " ".join(words[:5]) if words else "Item"
def annotate_price_box(image_bytes: bytes, box: Tuple[int, int, int, int], label: str = None) -> bytes:
"""Draw a red rectangle around the detected price; return annotated PNG bytes."""
try:
img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
draw = ImageDraw.Draw(img)
if box:
x, y, w, h = box
draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=3)
if label:
y0 = max(0, y - 22)
draw.rectangle([x, y0, x + max(60, len(label) * 8), y0 + 22], fill=(255, 0, 0))
draw.text((x + 4, y0 + 4), label, fill=(255, 255, 255))
out = io.BytesIO()
img.save(out, format="PNG")
return out.getvalue()
except Exception:
return image_bytes
|