File size: 5,153 Bytes
d56c6ae
a25ea49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d56c6ae
 
4972899
d56c6ae
 
 
 
d9af09c
d56c6ae
 
 
4972899
d56c6ae
 
 
834f432
a25ea49
d56c6ae
 
 
 
a25ea49
d56c6ae
 
 
d9af09c
a25ea49
 
4972899
d9af09c
4972899
 
a25ea49
 
d9af09c
681fa3a
a25ea49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d56c6ae
4972899
c205b4c
a25ea49
 
 
4972899
 
a25ea49
 
 
 
 
 
d9af09c
a25ea49
 
 
c205b4c
a25ea49
d56c6ae
d9af09c
a25ea49
4972899
 
d56c6ae
 
 
a25ea49
d56c6ae
 
a25ea49
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193

# import io
# import base64
# from typing import List, Dict, Tuple

# from PIL import Image
# from transformers import pipeline

# from src.config import LOGO_DETECTION_MODEL


# # --------------------------------------------------
# # MODEL INITIALIZATION (LOAD ONCE)
# # --------------------------------------------------
# # Object detection pipeline for logo / seal detection
# detector = pipeline(
#     task="object-detection",
#     model=LOGO_DETECTION_MODEL,
#     device=-1  # CPU
# )


# # --------------------------------------------------
# # LOGO DETECTION
# # --------------------------------------------------
# def detect_logos_from_bytes(
#     image_bytes: bytes,
#     resize: Tuple[int, int] = (1024, 1024),
#     max_logos: int = 3
# ) -> List[Dict[str, str | float]]:
#     """
#     Detect logos or visual emblems from raw image bytes.

#     The function resizes the image for faster inference,
#     detects logo regions, crops them, and returns the
#     cropped logo images encoded in base64 along with
#     confidence scores.

#     Parameters
#     ----------
#     image_bytes : bytes
#         Raw image data.
#     resize : tuple[int, int], optional
#         Maximum image size for inference (default: 1024x1024).
#     max_logos : int, optional
#         Maximum number of detected logos to return.

#     Returns
#     -------
#     list[dict]
#         List of detected logos with:
#         - confidence: float
#         - image_base64: str
#     """

#     # Load image from bytes
#     image: Image.Image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

#     # Resize image for performance optimization
#     image.thumbnail(resize)

#     # Run object detection
#     detections = detector(image)

#     results: List[Dict[str, str | float]] = []

#     # Process top detections only
#     for det in detections[:max_logos]:
#         box = det["box"]
#         score: float = float(det["score"])

#         xmin: int = int(box["xmin"])
#         ymin: int = int(box["ymin"])
#         xmax: int = int(box["xmax"])
#         ymax: int = int(box["ymax"])

#         # Crop detected logo region
#         cropped = image.crop((xmin, ymin, xmax, ymax))

#         # Convert cropped logo to base64
#         buffer = io.BytesIO()
#         cropped.save(buffer, format="PNG")

#         results.append({
#             "confidence": round(score, 3),
#             "image_base64": base64.b64encode(buffer.getvalue()).decode()
#         })

#     return results



import io
import base64
from typing import List, Dict, Tuple

from PIL import Image
from transformers import pipeline

from src.config import LOGO_DETECTION_MODEL


# --------------------------------------------------
# MODEL INITIALIZATION (LOAD ONCE)
# --------------------------------------------------
detector = pipeline(
    task="object-detection",
    model=LOGO_DETECTION_MODEL,
    device=-1  # CPU (HF Spaces safe)
)


# --------------------------------------------------
# LOGO DETECTION FUNCTION
# --------------------------------------------------
def detect_logos_from_bytes(
    image_bytes: bytes,
    resize: Tuple[int, int] = (1024, 1024),
    max_logos: int = 4,
    threshold: float = 0.2
) -> List[Dict[str, str | float]]:
    """
    Detect logos or visual emblems from raw image bytes.

    Returns cropped logo images (base64) with confidence scores.
    Works consistently on local & Hugging Face Spaces.
    """

    # -------------------------------
    # Load image (deterministic)
    # -------------------------------
    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

    # Deterministic resize (NO thumbnail)
    image = image.resize(
        (
            min(image.width, resize[0]),
            min(image.height, resize[1])
        )
    )

    # -------------------------------
    # Object detection (EXPLICIT threshold)
    # -------------------------------
    detections = detector(
        image,
        threshold=threshold
    )

    if not detections:
        return []

    # -------------------------------
    # Sort by confidence (IMPORTANT)
    # -------------------------------
    detections = sorted(
        detections,
        key=lambda x: x["score"],
        reverse=True
    )

    results: List[Dict[str, str | float]] = []

    # -------------------------------
    # Process top detections
    # -------------------------------
    for det in detections[:max_logos]:
        box = det["box"]
        score = float(det["score"])

        xmin = max(0, int(box["xmin"]))
        ymin = max(0, int(box["ymin"]))
        xmax = min(image.width, int(box["xmax"]))
        ymax = min(image.height, int(box["ymax"]))

        # Safety check
        if xmax <= xmin or ymax <= ymin:
            continue

        # Crop logo region
        cropped = image.crop((xmin, ymin, xmax, ymax))

        # Encode cropped logo to base64
        buffer = io.BytesIO()
        cropped.save(buffer, format="PNG")

        results.append({
            "confidence": round(score, 3),
            "image_base64": base64.b64encode(buffer.getvalue()).decode("utf-8")
        })

    return results