snipebargain / ocr_utils.py
theDavidGuy's picture
Update ocr_utils.py
ba9dd6c verified
import io, re
from typing import List, Dict, Tuple, Any
from PIL import Image, ImageDraw
try:
import pytesseract
from pytesseract import Output as TessOutput
except Exception:
pytesseract = None
TessOutput = None
PRICE_RE = re.compile(r"(\$\s*\d{1,4}(?:\.\d{2})?|\b\d{1,4}\.\d{2}\b)")
def ocr_image(image_bytes: bytes):
"""
ALWAYS return (full_text, tokens, size) even if OCR fails.
- full_text: str
- tokens: list of {'text','conf','box'}
- size: (width, height)
"""
# load image safely
try:
img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
except Exception:
return "", [], (0, 0)
# no OCR engine → empty, but still a triple
if pytesseract is None:
return "", [], img.size
# run OCR; on error return empty triple
try:
data = pytesseract.image_to_data(img, output_type=TessOutput.DICT)
except Exception:
return "", [], img.size
tokens: List[Dict[str, Any]] = []
for i, txt in enumerate(data["text"]):
if not txt:
continue
try:
conf = float(data.get("conf", ["-1"])[i])
except Exception:
conf = -1.0
x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i]
tokens.append({"text": txt, "conf": conf, "box": (x, y, w, h)})
full_text = " ".join(t["text"] for t in tokens)
return full_text, tokens, img.size
def guess_price(tokens: List[Dict[str, Any]]):
"""Pick smallest plausible price found in token texts. Returns (value, box) or (None, None)."""
best = None
for t in tokens:
m = PRICE_RE.search(t["text"].replace(",", ""))
if m:
raw = m.group(0).replace("$", "").strip()
try:
val = float(raw)
if 0.5 <= val <= 1000:
if best is None or val < best[0]:
best = (val, t["box"])
except Exception:
pass
return best if best else (None, None)
def guess_title(text: str) -> str:
"""Make a short human-readable title from OCR text."""
words = re.findall(r"[A-Za-z0-9\-]{3,}", text or "")
return " ".join(words[:5]) if words else "Item"
def annotate_price_box(image_bytes: bytes, box: Tuple[int, int, int, int], label: str = None) -> bytes:
"""Draw a red rectangle around the detected price; return annotated PNG bytes."""
try:
img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
draw = ImageDraw.Draw(img)
if box:
x, y, w, h = box
draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=3)
if label:
y0 = max(0, y - 22)
draw.rectangle([x, y0, x + max(60, len(label) * 8), y0 + 22], fill=(255, 0, 0))
draw.text((x + 4, y0 + 4), label, fill=(255, 255, 255))
out = io.BytesIO()
img.save(out, format="PNG")
return out.getvalue()
except Exception:
return image_bytes