File size: 16,486 Bytes
4a0d425
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
import os
from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation
import torch.nn as nn
import numpy as np 
import matplotlib.pyplot as plt
import cv2
import base64
from PIL import Image
from openai import OpenAI
import json 
import ast 
import io
import gradio as gr
import numpy as np
import gradio as gr

api_token = os.getenv("openai_key")


openai_apikey =api_token
client = OpenAI(api_key=openai_apikey)


labels = {
    0: "Background", 1: "Hat", 2: "Hair", 3: "Sunglasses", 4: "Upper-clothes", 
    5: "Skirt", 6: "Pants", 7: "Dress", 8: "Belt", 9: "Left-shoe", 
    10: "Right-shoe", 11: "Face", 12: "Left-leg", 13: "Right-leg", 
    14: "Left-arm", 15: "Right-arm", 16: "Bag", 17: "Scarf"
}



seg_processor = SegformerImageProcessor.from_pretrained("mattmdjaga/segformer_b2_clothes")
seg_model = AutoModelForSemanticSegmentation.from_pretrained("mattmdjaga/segformer_b2_clothes")



def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")
      
      
def encode_pil_image(image):
    buffered = io.BytesIO()
    format = image.format if image.format else "PNG" 
    image.save(buffered, format=format)
    return base64.b64encode(buffered.getvalue()).decode("utf-8")


    
    
def get_gpt_response(prompt,image=None,model=None,JSON=True):
  
    if model==None:
      model = "gpt-3.5-turbo"
    
    content_list = [ { "type": "text", "text": prompt}]    
    if image is not None:
        content_list.append(  {"type": "image_url","image_url": {"url": f"data:image/jpeg;base64,{image}", }})     

    completion = client.chat.completions.create(
        model=model,
        response_format= {
    "type": "json_object"
},
        messages=[{ "role": "user","content": content_list }]
    )
    
    return completion.choices[0].message.content



def get_segmentation(img):
    
    inputs = seg_processor(images=img, return_tensors="pt")
    outputs = seg_model(**inputs)
    logits = outputs.logits.cpu()
    
    upsampled_logits = nn.functional.interpolate(
    logits,
    size=img.size[::-1],
    mode="bilinear",
    align_corners=False,
)
    
    pred_seg = upsampled_logits.argmax(dim=1)[0]
    pred_seg = pred_seg.cpu().numpy()

    #Classify any "skin" below upper clothes as upper clothes. 
    upper_clothes_mask = (pred_seg == 4)
    min_upper_clothes_row = np.argmax(upper_clothes_mask, axis=0)
    face_mask = (pred_seg == 11)
    rows, cols = np.where(face_mask)  
    rows_below_upper_clothes = rows > min_upper_clothes_row[cols]  
    pred_seg[rows[rows_below_upper_clothes], cols[rows_below_upper_clothes]] = 4
    
    
    #get active labels
    active_labels = {key: labels[key] for key in np.unique(pred_seg) if key in labels}
    active_labels
    l = ""
    for key,value in active_labels.items():
        l+= f"{value}: {key} \n"
        
    return pred_seg, l



def erase_regions(model_image, pred_seg, parsed_erasure_labels):
  
    image_array = np.array(model_image)
 
    resized_pred_seg = cv2.resize(pred_seg, (image_array.shape[1], image_array.shape[0]), interpolation=cv2.INTER_NEAREST)
    erasure_mask = np.isin(resized_pred_seg, parsed_erasure_labels)
  
    image_array[erasure_mask] = [128, 128, 128]
    output_image = Image.fromarray(image_array)
    
    return output_image



def _get_detect_prompt(labels,garment_desc):
  detect_prompt  = f"""
      Analyze the provided garment description and the human model’s segmentation to determine which regions should be blacked out for a virtual try on task.
      
      #### Segmentation Labels  
      {labels}  
      
      #### Rules for Blacking Out Regions  
      - **Upper-body garments** (shirts, blouses, jackets): Black out Upper-clothes (4). If it has sleeves (short/long), also black out Left-arm (14) and Right-arm (15).  
      - **Lower-body garments** (pants, skirts, shorts): Black out Pants (6). If full-length, also black out Left-leg (12) and Right-leg (13).  
      - **Dresses/Jumpsuits**: Black out Upper-clothes (4) and Pants (6). If long-sleeved, add Left-arm (14) and Right-arm (15). If full-length, add Left-leg (12) and Right-leg (13).  
      - **Shoes**: Always black out Left-shoe (9) and Right-shoe (10).  
      - **Additional rules**:  
        - Sleeveless garments: Only black out Upper-clothes (4); keep arms visible.  
        - Shorts/Mini-skirts: Only black out Pants (6); keep legs visible.  
        - Transparency: Ignore; follow standard rules.  
        - Overlapping items: Prioritize the visible garment (e.g., if a dress is worn, black out Pants).  
        - Garments with sleeves: Remove the corresponding hand.  
      - **Never modify**: Background (0), Hair (2), Face (11).  
      - If it's a full-body garment remove upper-clothes (4) and left-leg (12) and right-leg (13) and pants (6)
      - Always check to include pants or not in your reasoning
      - Black out Arms even if its short sleeve
      
      Follow these rules precisely and return only the required segmentation labels.  
      #### Output Format  
      STRICTLY GIVE JSON ONLY, with the follownig schema:
      {{
      - **reasoning**: The reasoning for the decision.
      - **remove_decision**: A comma-separated list of binary values (0 or 1) indicating whether the corresponding segmentation labels should be blacked out or not.
      }}
      
      
      
      #### Examples  
      
      #### **Long-Sleeve Shirt**  
      {{'Garment Type': 'shirt', 'Garment Type Category': 'upper body garment', 'Coverage Areas': ['torso', 'arms'], 'Sleeves': 'long', 'Leg Coverage': 'none', 'Special Features': {{'hood': 'no', 'gloves': 'no', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'A long-sleeve shirt that covers the torso and arms.'}}
      
      - **Reasoning**: Black out Upper-clothes (4) because it covers the torso. Since it has long sleeves, both arms (14, 15) are also blacked out.  
      - **Output**: {{"reasoning": "Black out Upper-clothes (4) for torso. Long sleeves, so black out Left-arm (14) and Right-arm (15).", "remove_decision": [0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0]}} 
      
      #### **Full-Body Jumpsuit with Hood & Gloves**  
      {{'Garment Type': 'jumpsuit', 'Garment Type Category': 'full body garment', 'Coverage Areas': ['torso', 'arms', 'legs', 'hands', 'head'], 'Sleeves': 'long', 'Leg Coverage': 'full-length', 'Special Features': {{'hood': 'yes', 'gloves': 'yes', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'A full-body jumpsuit with a hood and gloves.'}}
      
      - **Reasoning**: Black out Upper-clothes (4) for the torso, Pants (6) for the leg portion, and full-length leg coverage requires blacking out Left-leg (12) and Right-leg (13). Since the sleeves are long, black out both arms (14, 15).  
      - **Output**: {{"reasoning": "Black out Upper-clothes (4) for torso, Pants (6) for legs, Left-leg (12), Right-leg (13) for full-length legs, Left-arm (14) and Right-arm (15) for long sleeves.", "remove_Decision": [0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,0]}}  
      
      #### **Short-Sleeve Crop Top**  
      {{'Garment Type': 'crop top', 'Garment Type Category': 'upper body garment', 'Coverage Areas': ['torso'], 'Sleeves': 'short', 'Leg Coverage': 'none', 'Special Features': {{'hood': 'no', 'gloves': 'no', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'A pink sequined crop top featuring a sleeveless design that covers the upper torso.'}}
      
      - **Reasoning**: Black out Upper-clothes (4) because it covers the upper torso. Since the top has short sleeves, both arms (14, 15) are blacked out.  
      - **Output**: {{"reasoning": "Black out Upper-clothes (4) for torso. Keep arms visible since it's short-sleeve.", "remove_decision": [0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0]}}  
      
      #### **Ankle-Length Dress with Long Sleeves**  
      {{'Garment Type': 'dress', 'Garment Type Category': 'full body garment', 'Coverage Areas': ['torso', 'arms', 'legs'], 'Sleeves': 'long', 'Leg Coverage': 'full-length', 'Special Features': {{'hood': 'no', 'gloves': 'no', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'An ankle-length dress with long sleeves that covers the torso and legs.'}}
      
      - **Reasoning**: Black out Upper-clothes (4) and Pants (6) because the dress covers both the torso and legs. Since it has long sleeves, black out Left-arm (14) and Right-arm (15). Since it's ankle-length, also black out Left-leg (12) and Right-leg (13).  
      - **Output**: {{"reasoning": "Black out Upper-clothes (4) and Pants (6) for torso and legs, Left-leg (12), Right-leg (13) for full-length legs, Left-arm (14) and Right-arm (15) for long sleeves.", "remove_Decision": [0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,0]}}  
      
      #### **Mini Skirt**  
      {{'Garment Type': 'skirt', 'Garment Type Category': 'lower body garment', 'Coverage Areas': ['lower torso'], 'Sleeves': 'none', 'Leg Coverage': 'short', 'Special Features': {{'hood': 'no', 'gloves': 'no', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'A mini skirt that covers the lower torso.'}}
      
      - **Reasoning**: Black out Pants (6) because the mini skirt covers the lower torso. Since it's short in length, the legs remain visible and are not blacked out.  
      - **Output**: {{"reasoning": "Black out Pants (6) for lower torso. Keep legs visible since it's short length.", "remove_decision": [0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0]}}  
      
      #### **Sneakers**  
      {{'Garment Type': 'shoes', 'Garment Type Category': 'footwear', 'Coverage Areas': ['feet'], 'Sleeves': 'none', 'Leg Coverage': 'none', 'Special Features': {{'hood': 'no', 'gloves': 'no', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'A pair of sneakers for footwear.'}}
      
      - **Reasoning**: Always black out Left-shoe (9) and Right-shoe (10) since they are footwear.  
      - **Output**: {{"reasoning": "Black out Left-shoe (9) and Right-shoe (10) for footwear.", "remove_decision": [0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0]}}  

      ### Slip Dress
    {{
    "Garment Type": "slip dress", "Garment Type Category": "dress", "Coverage Areas": ["torso", "upper thighs"], "Sleeves": "none", "Leg Coverage": "partial (upper thighs)",
    "Special Features": {{
      "hood": "no",
      "gloves": "no",
      "transparency": "no",
      "cut-outs": "no"
    }},
    "Description": "A sleeveless slip dress with a red bodice and a pink sequined skirt. It features a plunging neckline and a fitted design that extends to the upper thighs."
  }}
      Output: {{ "reasoning": "Black out Upper-clothes (4) and Pants (6) because the dress covers both the torso and upper thighs. Since it is sleeveless, arms (14, 15) remain visible. Legs (12, 13) are not blacked out because the dress is short and does not cover them fully.",
    "remove_decision": [0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0]}}

      
      {garment_desc} 
      
      - **Reasoning**:
      - **Output**:
      """
  
  return detect_prompt




def _get_garment_prompt():
  
  garment_prompt = """
Analyze the given garment image and provide a **detailed structured description**. Always give output only in JSON.  Focus on:

1. **Garment Type**: Clearly state what type of garment it is. Possible types: 
   - "shirt"
   - "dress"
   - "jacket"
   - "pants"
   - "gloves"
   - "sweater"
   - "skirt"
   - "shorts"
   - "vest"
   - "jumpsuit"
   - "coat"
   - "blouse"
   - "t-shirt"
   - "crop top"

2. **Garment Type Category**: Specify the category of the garment. Possible categories:
   - "upper body garment"
   - "lower body garment"
   - "full body garment"

3. **Coverage Areas**: Specify which body parts the garment covers. Possible areas:
   - "torso"
   - "arms"
   - "legs"
   - "hands"
   - "head"

4. **Sleeves & Length**: If the garment has sleeves, specify if they are:
   - "short"
   - "long"
   - "none"

5. **Leg Coverage**: If the garment covers the legs, specify if it's:
   - "full-length"
   - "knee-length"
   - "short"
   - "none"

6. **Special Features**: Mention any additional details such as:
   - **Hood** → If the garment includes a hood, covering the head. (Possible values: "yes", "no")
   - **Gloves** → If the garment has built-in gloves, covering hands. (Possible values: "yes", "no")
   - **Transparency** → If any part of the garment is see-through (e.g., mesh, lace). (Possible values: "yes", "no")
   - **Cut-outs** → If the garment has openings exposing skin (e.g., backless, ripped areas). (Possible values: "yes", "no")

7. **Description**: Provide a short textual description of the garment, summarizing its appearance, coverage, type, length, style, and key features.


### **Example Outputs:**

#### **Long-Sleeve Shirt**  
{
  "Garment Type": "shirt",
  "Garment Type Category": "upper body garment",
  "Coverage Areas": ["torso", "arms"],
  "Sleeves": "long",
  "Leg Coverage": "none",
  "Special Features": {
    "hood": "no",
    "gloves": "no",
    "transparency": "no",
    "cut-outs": "no"
  },
  Description": "A long-sleeve shirt made of cotton, providing full coverage for the torso and arms. It has a classic design with no additional features."
}

### **Full-Body Jumpsuit with Hood & Gloves**
{
  "Garment Type": "jumpsuit",
  "Garment Type Category": "full body garment",
  "Coverage Areas": ["torso", "arms", "legs", "hands", "head"],
  "Sleeves": "long",
  "Leg Coverage": "full-length",
  "Special Features": {
    "hood": "yes",
    "gloves": "yes",
    "transparency": "no",
    "cut-outs": "no"
  },
    "Description": "A full-body jumpsuit with a hood and built-in gloves. It provides full coverage for the torso, arms, legs, hands, and head."
   
}

### **Short-Sleeve Crop Top**
{
  "Garment Type": "crop top",
  "Garment Type Category": "upper body garment",
  "Coverage Areas": ["torso"],
  "Sleeves": "short",
  "Leg Coverage": "none",
  "Special Features": {
    "hood": "no",
    "gloves": "no",
    "transparency": "no",
    "cut-outs": "no"
  },
  "Description": "A casual short-sleeve crop top that covers the upper torso."
}

Output only JSON. 
"""
  return garment_prompt


def _get_erasure_prompt(labels,reasoning):
  prompt = f"""
  Here are the labels:

  {labels}

  and Here is a reasoning:
  {reasoning}

  Based on the labels and reasoning I want you to output a list containing elements that are to be reomved. For example output [3,4,5,6].

  output only JSON that contains a list: {{"erasure_labels":[indices]}}
  """
  
  return prompt
      

def fashion_masking(imgA,imgB, max_retries = 3):
    
    tries=0
    
    while tries <3:
        try:
        
            model_image = Image.fromarray(imgA)
            dress_image = Image.fromarray(imgB)
            
            pred_seg, l = get_segmentation(model_image)

            # garment_image_path = "dress.png"

            # model_image = Image.open("model_2.png").convert('RGB')
            # dress_image = Image.open("dress.png")

            base64_garment_image = encode_pil_image(dress_image)

            #base64_garment_image = encode_image(garment_image_path)

            garment_prompt = _get_garment_prompt()

            garment_desc = get_gpt_response(garment_prompt,base64_garment_image, model = "gpt-4o-mini")

            detect_prompt = _get_detect_prompt(garment_desc,labels)

            res = get_gpt_response(detect_prompt, model = "gpt-4o-mini")
            reasoning = json.loads(res)['reasoning']


            erasure_labels = get_gpt_response(_get_erasure_prompt(labels,reasoning))

            parsed_erasure_labels = json.loads(erasure_labels)["erasure_labels"]

            erased_img = erase_regions(model_image, pred_seg, parsed_erasure_labels)
            
            return_text = f"""Garment Description: {garment_desc} \n Reasoning : {reasoning} """
            
            return erased_img, return_text

        except Exception as e :
            
            tries = tries+1
            
            print(e)






demo = gr.Interface(
    fn=fashion_masking,  # Ensure this function returns (image, text)
    inputs=[
        gr.Image(label="Image A (person)", image_mode="RGB", type="numpy"),
        gr.Image(label="Image B (garment)", image_mode="RGB", type="numpy"),
    ],
    outputs=[
        gr.Image(label="Masked Output", image_mode="RGB", type="numpy"),
        gr.Textbox(label="Output Description")
    ],
    flagging_mode='never',
)

demo.launch(share=True)