Spaces:
Sleeping
Sleeping
| import io, re | |
| from typing import List, Dict, Tuple, Any | |
| from PIL import Image, ImageDraw | |
| try: | |
| import pytesseract | |
| from pytesseract import Output as TessOutput | |
| except Exception: | |
| pytesseract = None | |
| TessOutput = None | |
| PRICE_RE = re.compile(r"(\$\s*\d{1,4}(?:\.\d{2})?|\b\d{1,4}\.\d{2}\b)") | |
| def ocr_image(image_bytes: bytes): | |
| """ | |
| ALWAYS return (full_text, tokens, size) even if OCR fails. | |
| - full_text: str | |
| - tokens: list of {'text','conf','box'} | |
| - size: (width, height) | |
| """ | |
| # load image safely | |
| try: | |
| img = Image.open(io.BytesIO(image_bytes)).convert("RGB") | |
| except Exception: | |
| return "", [], (0, 0) | |
| # no OCR engine → empty, but still a triple | |
| if pytesseract is None: | |
| return "", [], img.size | |
| # run OCR; on error return empty triple | |
| try: | |
| data = pytesseract.image_to_data(img, output_type=TessOutput.DICT) | |
| except Exception: | |
| return "", [], img.size | |
| tokens: List[Dict[str, Any]] = [] | |
| for i, txt in enumerate(data["text"]): | |
| if not txt: | |
| continue | |
| try: | |
| conf = float(data.get("conf", ["-1"])[i]) | |
| except Exception: | |
| conf = -1.0 | |
| x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i] | |
| tokens.append({"text": txt, "conf": conf, "box": (x, y, w, h)}) | |
| full_text = " ".join(t["text"] for t in tokens) | |
| return full_text, tokens, img.size | |
| def guess_price(tokens: List[Dict[str, Any]]): | |
| """Pick smallest plausible price found in token texts. Returns (value, box) or (None, None).""" | |
| best = None | |
| for t in tokens: | |
| m = PRICE_RE.search(t["text"].replace(",", "")) | |
| if m: | |
| raw = m.group(0).replace("$", "").strip() | |
| try: | |
| val = float(raw) | |
| if 0.5 <= val <= 1000: | |
| if best is None or val < best[0]: | |
| best = (val, t["box"]) | |
| except Exception: | |
| pass | |
| return best if best else (None, None) | |
| def guess_title(text: str) -> str: | |
| """Make a short human-readable title from OCR text.""" | |
| words = re.findall(r"[A-Za-z0-9\-]{3,}", text or "") | |
| return " ".join(words[:5]) if words else "Item" | |
| def annotate_price_box(image_bytes: bytes, box: Tuple[int, int, int, int], label: str = None) -> bytes: | |
| """Draw a red rectangle around the detected price; return annotated PNG bytes.""" | |
| try: | |
| img = Image.open(io.BytesIO(image_bytes)).convert("RGB") | |
| draw = ImageDraw.Draw(img) | |
| if box: | |
| x, y, w, h = box | |
| draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=3) | |
| if label: | |
| y0 = max(0, y - 22) | |
| draw.rectangle([x, y0, x + max(60, len(label) * 8), y0 + 22], fill=(255, 0, 0)) | |
| draw.text((x + 4, y0 + 4), label, fill=(255, 255, 255)) | |
| out = io.BytesIO() | |
| img.save(out, format="PNG") | |
| return out.getvalue() | |
| except Exception: | |
| return image_bytes | |