Spaces:
Sleeping
Sleeping
File size: 5,153 Bytes
d56c6ae a25ea49 d56c6ae 4972899 d56c6ae d9af09c d56c6ae 4972899 d56c6ae 834f432 a25ea49 d56c6ae a25ea49 d56c6ae d9af09c a25ea49 4972899 d9af09c 4972899 a25ea49 d9af09c 681fa3a a25ea49 d56c6ae 4972899 c205b4c a25ea49 4972899 a25ea49 d9af09c a25ea49 c205b4c a25ea49 d56c6ae d9af09c a25ea49 4972899 d56c6ae a25ea49 d56c6ae a25ea49 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# import io
# import base64
# from typing import List, Dict, Tuple
# from PIL import Image
# from transformers import pipeline
# from src.config import LOGO_DETECTION_MODEL
# # --------------------------------------------------
# # MODEL INITIALIZATION (LOAD ONCE)
# # --------------------------------------------------
# # Object detection pipeline for logo / seal detection
# detector = pipeline(
# task="object-detection",
# model=LOGO_DETECTION_MODEL,
# device=-1 # CPU
# )
# # --------------------------------------------------
# # LOGO DETECTION
# # --------------------------------------------------
# def detect_logos_from_bytes(
# image_bytes: bytes,
# resize: Tuple[int, int] = (1024, 1024),
# max_logos: int = 3
# ) -> List[Dict[str, str | float]]:
# """
# Detect logos or visual emblems from raw image bytes.
# The function resizes the image for faster inference,
# detects logo regions, crops them, and returns the
# cropped logo images encoded in base64 along with
# confidence scores.
# Parameters
# ----------
# image_bytes : bytes
# Raw image data.
# resize : tuple[int, int], optional
# Maximum image size for inference (default: 1024x1024).
# max_logos : int, optional
# Maximum number of detected logos to return.
# Returns
# -------
# list[dict]
# List of detected logos with:
# - confidence: float
# - image_base64: str
# """
# # Load image from bytes
# image: Image.Image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
# # Resize image for performance optimization
# image.thumbnail(resize)
# # Run object detection
# detections = detector(image)
# results: List[Dict[str, str | float]] = []
# # Process top detections only
# for det in detections[:max_logos]:
# box = det["box"]
# score: float = float(det["score"])
# xmin: int = int(box["xmin"])
# ymin: int = int(box["ymin"])
# xmax: int = int(box["xmax"])
# ymax: int = int(box["ymax"])
# # Crop detected logo region
# cropped = image.crop((xmin, ymin, xmax, ymax))
# # Convert cropped logo to base64
# buffer = io.BytesIO()
# cropped.save(buffer, format="PNG")
# results.append({
# "confidence": round(score, 3),
# "image_base64": base64.b64encode(buffer.getvalue()).decode()
# })
# return results
import io
import base64
from typing import List, Dict, Tuple
from PIL import Image
from transformers import pipeline
from src.config import LOGO_DETECTION_MODEL
# --------------------------------------------------
# MODEL INITIALIZATION (LOAD ONCE)
# --------------------------------------------------
detector = pipeline(
task="object-detection",
model=LOGO_DETECTION_MODEL,
device=-1 # CPU (HF Spaces safe)
)
# --------------------------------------------------
# LOGO DETECTION FUNCTION
# --------------------------------------------------
def detect_logos_from_bytes(
image_bytes: bytes,
resize: Tuple[int, int] = (1024, 1024),
max_logos: int = 4,
threshold: float = 0.2
) -> List[Dict[str, str | float]]:
"""
Detect logos or visual emblems from raw image bytes.
Returns cropped logo images (base64) with confidence scores.
Works consistently on local & Hugging Face Spaces.
"""
# -------------------------------
# Load image (deterministic)
# -------------------------------
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
# Deterministic resize (NO thumbnail)
image = image.resize(
(
min(image.width, resize[0]),
min(image.height, resize[1])
)
)
# -------------------------------
# Object detection (EXPLICIT threshold)
# -------------------------------
detections = detector(
image,
threshold=threshold
)
if not detections:
return []
# -------------------------------
# Sort by confidence (IMPORTANT)
# -------------------------------
detections = sorted(
detections,
key=lambda x: x["score"],
reverse=True
)
results: List[Dict[str, str | float]] = []
# -------------------------------
# Process top detections
# -------------------------------
for det in detections[:max_logos]:
box = det["box"]
score = float(det["score"])
xmin = max(0, int(box["xmin"]))
ymin = max(0, int(box["ymin"]))
xmax = min(image.width, int(box["xmax"]))
ymax = min(image.height, int(box["ymax"]))
# Safety check
if xmax <= xmin or ymax <= ymin:
continue
# Crop logo region
cropped = image.crop((xmin, ymin, xmax, ymax))
# Encode cropped logo to base64
buffer = io.BytesIO()
cropped.save(buffer, format="PNG")
results.append({
"confidence": round(score, 3),
"image_base64": base64.b64encode(buffer.getvalue()).decode("utf-8")
})
return results
|