File size: 16,989 Bytes
fdad24e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2212fc3
fdad24e
 
 
2212fc3
fdad24e
 
 
 
 
2212fc3
fdad24e
 
 
 
 
2212fc3
fdad24e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7844b93
fdad24e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
import streamlit as st
from PIL import Image, ImageDraw, ImageFont
import io
from io import BytesIO
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from rembg import remove
import mediapipe as mp
import torch
from transformers import AutoProcessor, AutoModelForCausalLM
from transformers.dynamic_module_utils import get_imports
from unittest.mock import patch
from scipy.spatial import distance as dist

st.set_page_config(layout="wide", page_title="Ring Size Measurement")
ring_size_dict = {
    14.0: 3,
    14.4: 3.5,
    14.8: 4,
    15.2: 4.5,
    15.6: 5,
    16.0: 5.5,
    16.45: 6,
    16.9: 6.5,
    17.3: 7,
    17.7: 7.5,
    18.2: 8,
    18.6: 8.5,
    19.0: 9,
    19.4: 9.5,
    19.8: 10,
    20.2: 10.5,
    20.6: 11,
    21.0: 11.5,
    21.4: 12,
    21.8: 12.5,
    22.2: 13,
    22.6: 13.5
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def fixed_get_imports(filename: str | os.PathLike) -> list[str]:
    if not str(filename).endswith("modeling_florence2.py"):
        return get_imports(filename)
    imports = get_imports(filename)
    imports.remove("flash_attn")
    return imports

def load_model():
    model_id = "microsoft/Florence-2-base-ft"
    processor = AutoProcessor.from_pretrained(model_id, torch_dtype=torch.qint8, trust_remote_code=True)
    
    try:
        os.mkdir("temp")
    except:
        pass
    
    with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports):
        model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="sdpa", trust_remote_code=True)
    
    Qmodel = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
    return Qmodel.to(device), processor 

if 'model_loaded' not in st.session_state:
    st.session_state.model_loaded = False

if not st.session_state.model_loaded:
    with st.spinner('Loading model...'):
        st.session_state.model, st.session_state.processor = load_model()
        st.session_state.model_loaded = True
        st.write("Model loaded complete")

def calculate_pixel_per_metric(image, known_diameter_of_coin=25):
    def generate_labels(model, processor, task_prompt, image, text_input=None):
        if text_input is None:
            prompt = task_prompt
        else:
            prompt = task_prompt + " " + text_input
        
        inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)

        generated_ids = model.generate(
            input_ids=inputs["input_ids"],
            pixel_values=inputs["pixel_values"],
            max_new_tokens=1024,
            early_stopping=False,
            do_sample=False,
            num_beams=3,
        )

        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]

        output = processor.post_process_generation(
            generated_text,
            task=task_prompt,
            image_size=(image.width, image.height)
        )
        
        return output

    def plot_bbox(original_image, data):
        # Create a copy of the original image to draw on
        image_with_bboxes = original_image.copy()

        # Use Pillow to draw bounding boxes and labels
        draw = ImageDraw.Draw(image_with_bboxes)
        def calculate_bbox_dimensions(bbox):
            x1, y1, x2, y2 = bbox
            width = x2 - x1
            height = y2 - y1
            return width, height

        # Inside your `plot_bbox` function, after drawing the bounding box:
        font = ImageFont.truetype("arial.ttf", 28)
        for bbox, label in zip(data['bboxes'], data['labels']):
            x1, y1, x2, y2 = bbox
            draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
            draw.text((x1, y1), label, fill="red", font=font)

        # Calculate dimensions
        width, height = calculate_bbox_dimensions(bbox)
        print(f"Label: {label}, Width: {width}, Height: {height}")
        dimension_text = f"W: {width}, H: {height}"
        draw.text((x1, y1 + 20), dimension_text, fill="red", font=font)

        real_world_dimension_mm = 160
        largest_dimension = max(width, height)
        pixels_per_mm = largest_dimension / real_world_dimension_mm
        ratio_text = f"Pixels/mm: {pixels_per_mm:.2f}"
        draw.text((x1, y1 + 40), ratio_text, fill="red", font=font)

        # buf = BytesIO()
        # image_with_bboxes.save(buf, format='PNG')
        # buf.seek(0)


        return image_with_bboxes,pixels_per_mm,pixels_per_mm
    
    def detecting_ruler(model, processor, image, task_prompt, text_input=None):
        results = generate_labels(model, processor, task_prompt, image, text_input=text_input)
        image_with_bboxes, value_1, value_2 = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
        return value_1, value_2, image_with_bboxes
    
    image_for_model = image.copy()

    image_for_model = cv2.cvtColor(image_for_model, cv2.COLOR_BGR2RGB)
    image_for_model = Image.fromarray(image_for_model)
    # if image_for_model.mode != 'RGB':
    #     image_for_model = image_for_model.convert('RGB')

    # Process the image
    text_input = "ruler"
    task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
    pixel_per_metric, mm_per_pixel, marked_image_buf = detecting_ruler(st.session_state.model, st.session_state.processor, image_for_model, task_prompt, text_input)
            
    return pixel_per_metric, mm_per_pixel, marked_image_buf

def process_image(image):
    return remove(image)

def calculate_pip_width(image, original_img, pixel_per_metric):
    def calSize(xA, yA, xB, yB, color_circle, color_line, img):
        d = dist.euclidean((xA, yA), (xB, yB))
        cv2.circle(img, (int(xA), int(yA)), 5, color_circle, -1)
        cv2.circle(img, (int(xB), int(yB)), 5, color_circle, -1)
        cv2.line(img, (int(xA), int(yA)), (int(xB), int(yB)), color_line, 2)
        d_mm = d / pixel_per_metric
        d_mm = d_mm - 1.5
        cv2.putText(img, "{:.1f}".format(d_mm), (int(xA - 15), int(yA - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (255, 255, 255), 2)
        print(d_mm)
        return d_mm
    
    def process_point(point, cnt, m1, b):
        x1, x2 = point[0], point[0]
        y1 = m1 * x1 + b
        y2 = m1 * x2 + b

        result = 1.0
        while result > 0:
            result = cv2.pointPolygonTest(cnt, (x1, y1), False)
            x1 += 1
            y1 = m1 * x1 + b
        x1 -= 1

        result = 1.0
        while result > 0:
            result = cv2.pointPolygonTest(cnt, (x2, y2), False)
            x2 -= 1
            y2 = m1 * x2 + b
        x2 += 1

        return x1, y1, x2, y2
    
    og_img = original_img.copy()
    imgH, imgW, _ = image.shape
    imgcpy = image.copy()
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(image_gray, 1, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour_image = np.zeros_like(image_gray)
    cv2.drawContours(contour_image, contours, -1, (255), thickness=cv2.FILLED)
    cv2.drawContours(imgcpy, contours, -1, (0, 255, 0), 2)
    # print("length : ",len(contours))
    
    marked_img = image.copy()

    if len(contours) > 0:
        cnt = max(contours, key=cv2.contourArea)
        frame2 = cv2.cvtColor(og_img, cv2.COLOR_BGR2RGB)
        handsLM = mp.solutions.hands.Hands(max_num_hands=1, min_detection_confidence=0.8, min_tracking_confidence=0.8)
        pr = handsLM.process(frame2)
        print(pr.multi_hand_landmarks)
        if pr.multi_hand_landmarks:
            for hand_landmarks in pr.multi_hand_landmarks:
                lmlist = []
                for id, landMark in enumerate(hand_landmarks.landmark):
                    xPos, yPos = int(landMark.x * imgW), int(landMark.y * imgH)
                    lmlist.append([id, xPos, yPos])

                if len(lmlist) != 0:
                    pip_joint = [lmlist[14][1], lmlist[14][2]]
                    mcp_joint = [lmlist[13][1], lmlist[13][2]]

                    midpoint_x = (pip_joint[0] + mcp_joint[0]) / 2
                    midpoint_y = (pip_joint[1] + mcp_joint[1]) / 2
                    midpoint = [midpoint_x, midpoint_y]

                    m2 = (pip_joint[1] - mcp_joint[1]) / (pip_joint[0] - mcp_joint[0])
                    m1 = -1 / m2
                    b = pip_joint[1] - m1 * pip_joint[0]

                    #pip_joint
                    x1_pip, y1_pip, x2_pip, y2_pip = process_point(pip_joint, cnt, m1, b)

                    m2 = (midpoint_y - mcp_joint[1]) / (midpoint_x - mcp_joint[0])
                    m1 = -1 / m2
                    b = midpoint_y - m1 * midpoint_x

                    #midpoint
                    x1_mid, y1_mid, x2_mid, y2_mid = process_point(midpoint, cnt, m1, b)

                    d_mm_pip = calSize(x1_pip, y1_pip, x2_pip, y2_pip, (255, 0, 0), (255, 0, 255), original_img)
                    d_mm_mid = calSize(x1_mid, y1_mid, x2_mid, y2_mid, (0, 255, 0), (0, 0, 255), original_img)

    largest_d_mm = max(int(d_mm_mid),int(d_mm_pip))
    return original_img, largest_d_mm, imgcpy, marked_img

def mark_hand_landmarks(image_path):

    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands()
    mp_draw = mp.solutions.drawing_utils

    img = image_path
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    results = hands.process(img_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(img, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            
            mcp = hand_landmarks.landmark[13]
            pip = hand_landmarks.landmark[14]
            
            img_height, img_width, _ = img.shape
            
            mcp_x, mcp_y = int(mcp.x * img_width), int(mcp.y * img_height)
            pip_x, pip_y = int(pip.x * img_width), int(pip.y * img_height)
            
            cv2.circle(img, (mcp_x, mcp_y), 10, (255, 0, 0), -1)
            cv2.circle(img, (pip_x, pip_y), 10, (255, 0, 0), -1)

    return img

def show_resized_image(images, titles, scale=0.5):
    num_images = len(images)
    
    fig, axes = plt.subplots(2, 3, figsize=(17, 13))  
    axes = axes.flatten()  

    for ax in axes[num_images:]:
        ax.axis('off')
    i = 0
    for ax, img, title in zip(axes, images, titles):
        i = i + 1
        print(i)
        resized_image = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
        ax.imshow(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB))
        ax.set_title(title)
        ax.axis('off')

    plt.tight_layout()
    img_stream = BytesIO()
    plt.savefig(img_stream, format='png')
    img_stream.seek(0)
    plt.close(fig)
    return img_stream

def get_ring_size(mm_value):
    if mm_value in ring_size_dict:
        return ring_size_dict[mm_value]
    else:
        closest_mm = min(ring_size_dict.keys(), key=lambda x: abs(x - mm_value))
        return ring_size_dict[closest_mm]

# st.set_page_config(layout="wide", page_title="Ring Size Measurement")
st.write("## Determine Your Ring Size")
st.write(
    "📏 Upload an image of your hand to measure the finger width and determine your ring size. The measurement will be displayed along with a visual breakdown of the image processing flow."
)
st.sidebar.write("## Upload :gear:")
#~~
st.write("### Workflow Overview")
st.image("FlowChart.png", caption="Workflow Overview", use_column_width=True)

st.write("### Detailed Workflow")
st.write("1. **Florence-2 Model:** Florence-2 is an advanced vision foundation model that uses a prompt-based approach to handle a wide range of vision and vision-language tasks.We utilize this model to detect the scale within the image and mark a bounding box which we can use to find the approximate full measurement of scale.")
st.write("2. **Pixel Per Metric Ratio:** The Pixel Per Metric Ratio is used to convert pixel measurements into real-world units. By comparing the pixel length obtained from image analysis (i.e., Hough Circle) with the known real-world measurement of the reference object (coin), we get the ratio. This ratio then allows us to accurately scale and size estimation of objects within the image.")
st.write("3. **Background Removal:** Removing the background first ensures that only the relevant subject is highlighted. We start by converting the image to grayscale and applying thresholding to distinguish the subject from the background. Erosion and dilation then clean up the image, improving the detection of specific features like individual fingers.")
st.write("4. **Contour Detection:** We use Contour Detection to find the largest contour, which allows us to outline or draw a boundary around the subject (i.e., hand). This highlights the object's shape and edges, improving the precision of the subject.")
st.write("5. **Finding Hand Landmarks:** This involves using the MediaPipe library to identify key points on the hand, such as the PIP (Proximal Interphalangeal) and MCP (Metacarpophalangeal) joints of the ring finger. This enables precise tracking and analysis of finger positions and movements.")
st.write("6. **Determining Finger Width:** Here we use the slope formula `[y = mx + b]` with PIP and MCP points to measure the finger's width. We project outward perpendicularly from the PIP point towards the MCP point, then apply a point polygon test to accurately determine the pixel width of the finger.")
st.write("7. **Predicting Ring Size:** Predicting Ring Size involves calculating the finger’s diameter using the Pixel Per Metric Ratio and the largest width measurement at the PIP or MCP joint. This diameter is then used to predict the appropriate ring size.")
#~~

MAX_FILE_SIZE = 5 * 1024 * 1024  # 5MB

def process_image_and_get_results(upload):
    image = Image.open(upload)
    # image =  cv2.imread(upload)
    image_np = np.array(image)
    image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
    original_img = image_np.copy()
    og_img1 = image_np.copy()
    og_img2 = image_np.copy()
    img_1 = image_np.copy()
    hand_lms = mark_hand_landmarks(img_1)

    pixel_per_metric, mm_per_pixel, image_with_coin_info = calculate_pixel_per_metric(image_np)
    processed_image = process_image(og_img1)
    image_with_pip_width, width_mm, contour_image, pip_mark_img = calculate_pip_width(processed_image, original_img, pixel_per_metric)
    image_with_coin_info = np.array(image_with_coin_info)
    if image_with_coin_info is None:
        print("inside1")
        raise ValueError("Image is None, cannot resize.")
        
    elif not isinstance(image_with_coin_info, (np.ndarray, cv2.UMat)):
        print("inside2")
        raise TypeError(f"Invalid image type: {type(image_with_coin_info)}. Expected numpy array or cv2.UMat.")
    ring_size = get_ring_size(width_mm)
    return {
        "processed_image": image_with_pip_width,
        "original_image": og_img2,
        "hand_lm_marked_image": hand_lms,
        "image_with_coin_info": image_with_coin_info,
        "contour_image": contour_image,
        "width_mm": width_mm,
        "ring_size": ring_size
    }

def show_how_it_works(processed_image):
    st.write("## How It Works")
    st.write("Here's a step-by-step breakdown of how your image is processed to determine your ring size:")
    st.image(processed_image, caption="Image Processing Flow", use_column_width=True)

col1, col2 = st.columns(2)
my_upload = st.sidebar.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])

if my_upload is not None:
    if my_upload.size > MAX_FILE_SIZE:
        st.error("The uploaded file is too large. Please upload an image smaller than 5MB.")
    else:
        st.write("## Image Processing Flow")
        results = process_image_and_get_results(my_upload)

        col1.write("Uploaded Image :camera:")
        col1.image(cv2.cvtColor(results["original_image"], cv2.COLOR_BGR2RGB), caption="Uploaded Image")

        col2.write("Processed Image :wrench:")
        col2.image(cv2.cvtColor(results["processed_image"], cv2.COLOR_BGR2RGB), caption="Processed Image with PIP Width")

        st.write(f"📏 The width of your finger is {results['width_mm']:.2f} mm, and the estimated ring size is {results['ring_size']:.1f}.")

        if st.button("How it Works"):
            st.write("## How It Works")
            st.write("Here's a step-by-step breakdown of how your image is processed to determine your ring size:")
            print("here")
            img_stream = show_resized_image(
                [results["original_image"], results["image_with_coin_info"], results["contour_image"], results["hand_lm_marked_image"], results["processed_image"]],
                ['Original Image', 'Image with Scale Info', 'Contour Boundary Image', 'Hand Landmarks', 'Ring Finger Width'],
                scale=0.5
            )
            st.image(img_stream, caption="Processing Flow", use_column_width=True)
else:
    st.info("Please upload an image to get started.")