Spaces:

Aadharsh
/

clothing-segmentation

Sleeping

File size: 16,486 Bytes

4a0d425

import os
from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation
import torch.nn as nn
import numpy as np 
import matplotlib.pyplot as plt
import cv2
import base64
from PIL import Image
from openai import OpenAI
import json 
import ast 
import io
import gradio as gr
import numpy as np
import gradio as gr

api_token = os.getenv("openai_key")


openai_apikey =api_token
client = OpenAI(api_key=openai_apikey)


labels = {
    0: "Background", 1: "Hat", 2: "Hair", 3: "Sunglasses", 4: "Upper-clothes", 
    5: "Skirt", 6: "Pants", 7: "Dress", 8: "Belt", 9: "Left-shoe", 
    10: "Right-shoe", 11: "Face", 12: "Left-leg", 13: "Right-leg", 
    14: "Left-arm", 15: "Right-arm", 16: "Bag", 17: "Scarf"
}



seg_processor = SegformerImageProcessor.from_pretrained("mattmdjaga/segformer_b2_clothes")
seg_model = AutoModelForSemanticSegmentation.from_pretrained("mattmdjaga/segformer_b2_clothes")



def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")
      
      
def encode_pil_image(image):
    buffered = io.BytesIO()
    format = image.format if image.format else "PNG" 
    image.save(buffered, format=format)
    return base64.b64encode(buffered.getvalue()).decode("utf-8")


    
    
def get_gpt_response(prompt,image=None,model=None,JSON=True):
  
    if model==None:
      model = "gpt-3.5-turbo"
    
    content_list = [ { "type": "text", "text": prompt}]    
    if image is not None:
        content_list.append(  {"type": "image_url","image_url": {"url": f"data:image/jpeg;base64,{image}", }})     

    completion = client.chat.completions.create(
        model=model,
        response_format= {
    "type": "json_object"
},
        messages=[{ "role": "user","content": content_list }]
    )
    
    return completion.choices[0].message.content



def get_segmentation(img):
    
    inputs = seg_processor(images=img, return_tensors="pt")
    outputs = seg_model(**inputs)
    logits = outputs.logits.cpu()
    
    upsampled_logits = nn.functional.interpolate(
    logits,
    size=img.size[::-1],
    mode="bilinear",
    align_corners=False,
)
    
    pred_seg = upsampled_logits.argmax(dim=1)[0]
    pred_seg = pred_seg.cpu().numpy()

    #Classify any "skin" below upper clothes as upper clothes. 
    upper_clothes_mask = (pred_seg == 4)
    min_upper_clothes_row = np.argmax(upper_clothes_mask, axis=0)
    face_mask = (pred_seg == 11)
    rows, cols = np.where(face_mask)  
    rows_below_upper_clothes = rows > min_upper_clothes_row[cols]  
    pred_seg[rows[rows_below_upper_clothes], cols[rows_below_upper_clothes]] = 4
    
    
    #get active labels
    active_labels = {key: labels[key] for key in np.unique(pred_seg) if key in labels}
    active_labels
    l = ""
    for key,value in active_labels.items():
        l+= f"{value}: {key} \n"
        
    return pred_seg, l



def erase_regions(model_image, pred_seg, parsed_erasure_labels):
  
    image_array = np.array(model_image)
 
    resized_pred_seg = cv2.resize(pred_seg, (image_array.shape[1], image_array.shape[0]), interpolation=cv2.INTER_NEAREST)
    erasure_mask = np.isin(resized_pred_seg, parsed_erasure_labels)
  
    image_array[erasure_mask] = [128, 128, 128]
    output_image = Image.fromarray(image_array)
    
    return output_image



def _get_detect_prompt(labels,garment_desc):
  detect_prompt  = f"""
      Analyze the provided garment description and the human model’s segmentation to determine which regions should be blacked out for a virtual try on task.
      
      #### Segmentation Labels  
      {labels}  
      
      #### Rules for Blacking Out Regions  
      - **Upper-body garments** (shirts, blouses, jackets): Black out Upper-clothes (4). If it has sleeves (short/long), also black out Left-arm (14) and Right-arm (15).  
      - **Lower-body garments** (pants, skirts, shorts): Black out Pants (6). If full-length, also black out Left-leg (12) and Right-leg (13).  
      - **Dresses/Jumpsuits**: Black out Upper-clothes (4) and Pants (6). If long-sleeved, add Left-arm (14) and Right-arm (15). If full-length, add Left-leg (12) and Right-leg (13).  
      - **Shoes**: Always black out Left-shoe (9) and Right-shoe (10).  
      - **Additional rules**:  
        - Sleeveless garments: Only black out Upper-clothes (4); keep arms visible.  
        - Shorts/Mini-skirts: Only black out Pants (6); keep legs visible.  
        - Transparency: Ignore; follow standard rules.  
        - Overlapping items: Prioritize the visible garment (e.g., if a dress is worn, black out Pants).  
        - Garments with sleeves: Remove the corresponding hand.  
      - **Never modify**: Background (0), Hair (2), Face (11).  
      - If it's a full-body garment remove upper-clothes (4) and left-leg (12) and right-leg (13) and pants (6)
      - Always check to include pants or not in your reasoning
      - Black out Arms even if its short sleeve
      
      Follow these rules precisely and return only the required segmentation labels.  
      #### Output Format  
      STRICTLY GIVE JSON ONLY, with the follownig schema:
      {{
      - **reasoning**: The reasoning for the decision.
      - **remove_decision**: A comma-separated list of binary values (0 or 1) indicating whether the corresponding segmentation labels should be blacked out or not.
      }}
      
      
      
      #### Examples  
      
      #### **Long-Sleeve Shirt**  
      {{'Garment Type': 'shirt', 'Garment Type Category': 'upper body garment', 'Coverage Areas': ['torso', 'arms'], 'Sleeves': 'long', 'Leg Coverage': 'none', 'Special Features': {{'hood': 'no', 'gloves': 'no', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'A long-sleeve shirt that covers the torso and arms.'}}
      
      - **Reasoning**: Black out Upper-clothes (4) because it covers the torso. Since it has long sleeves, both arms (14, 15) are also blacked out.  
      - **Output**: {{"reasoning": "Black out Upper-clothes (4) for torso. Long sleeves, so black out Left-arm (14) and Right-arm (15).", "remove_decision": [0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0]}} 
      
      #### **Full-Body Jumpsuit with Hood & Gloves**  
      {{'Garment Type': 'jumpsuit', 'Garment Type Category': 'full body garment', 'Coverage Areas': ['torso', 'arms', 'legs', 'hands', 'head'], 'Sleeves': 'long', 'Leg Coverage': 'full-length', 'Special Features': {{'hood': 'yes', 'gloves': 'yes', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'A full-body jumpsuit with a hood and gloves.'}}
      
      - **Reasoning**: Black out Upper-clothes (4) for the torso, Pants (6) for the leg portion, and full-length leg coverage requires blacking out Left-leg (12) and Right-leg (13). Since the sleeves are long, black out both arms (14, 15).  
      - **Output**: {{"reasoning": "Black out Upper-clothes (4) for torso, Pants (6) for legs, Left-leg (12), Right-leg (13) for full-length legs, Left-arm (14) and Right-arm (15) for long sleeves.", "remove_Decision": [0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,0]}}  
      
      #### **Short-Sleeve Crop Top**  
      {{'Garment Type': 'crop top', 'Garment Type Category': 'upper body garment', 'Coverage Areas': ['torso'], 'Sleeves': 'short', 'Leg Coverage': 'none', 'Special Features': {{'hood': 'no', 'gloves': 'no', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'A pink sequined crop top featuring a sleeveless design that covers the upper torso.'}}
      
      - **Reasoning**: Black out Upper-clothes (4) because it covers the upper torso. Since the top has short sleeves, both arms (14, 15) are blacked out.  
      - **Output**: {{"reasoning": "Black out Upper-clothes (4) for torso. Keep arms visible since it's short-sleeve.", "remove_decision": [0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0]}}  
      
      #### **Ankle-Length Dress with Long Sleeves**  
      {{'Garment Type': 'dress', 'Garment Type Category': 'full body garment', 'Coverage Areas': ['torso', 'arms', 'legs'], 'Sleeves': 'long', 'Leg Coverage': 'full-length', 'Special Features': {{'hood': 'no', 'gloves': 'no', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'An ankle-length dress with long sleeves that covers the torso and legs.'}}
      
      - **Reasoning**: Black out Upper-clothes (4) and Pants (6) because the dress covers both the torso and legs. Since it has long sleeves, black out Left-arm (14) and Right-arm (15). Since it's ankle-length, also black out Left-leg (12) and Right-leg (13).  
      - **Output**: {{"reasoning": "Black out Upper-clothes (4) and Pants (6) for torso and legs, Left-leg (12), Right-leg (13) for full-length legs, Left-arm (14) and Right-arm (15) for long sleeves.", "remove_Decision": [0,0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,0,0]}}  
      
      #### **Mini Skirt**  
      {{'Garment Type': 'skirt', 'Garment Type Category': 'lower body garment', 'Coverage Areas': ['lower torso'], 'Sleeves': 'none', 'Leg Coverage': 'short', 'Special Features': {{'hood': 'no', 'gloves': 'no', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'A mini skirt that covers the lower torso.'}}
      
      - **Reasoning**: Black out Pants (6) because the mini skirt covers the lower torso. Since it's short in length, the legs remain visible and are not blacked out.  
      - **Output**: {{"reasoning": "Black out Pants (6) for lower torso. Keep legs visible since it's short length.", "remove_decision": [0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0]}}  
      
      #### **Sneakers**  
      {{'Garment Type': 'shoes', 'Garment Type Category': 'footwear', 'Coverage Areas': ['feet'], 'Sleeves': 'none', 'Leg Coverage': 'none', 'Special Features': {{'hood': 'no', 'gloves': 'no', 'transparency': 'no', 'cut-outs': 'no'}}, 'Description': 'A pair of sneakers for footwear.'}}
      
      - **Reasoning**: Always black out Left-shoe (9) and Right-shoe (10) since they are footwear.  
      - **Output**: {{"reasoning": "Black out Left-shoe (9) and Right-shoe (10) for footwear.", "remove_decision": [0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0]}}  

      ### Slip Dress
    {{
    "Garment Type": "slip dress", "Garment Type Category": "dress", "Coverage Areas": ["torso", "upper thighs"], "Sleeves": "none", "Leg Coverage": "partial (upper thighs)",
    "Special Features": {{
      "hood": "no",
      "gloves": "no",
      "transparency": "no",
      "cut-outs": "no"
    }},
    "Description": "A sleeveless slip dress with a red bodice and a pink sequined skirt. It features a plunging neckline and a fitted design that extends to the upper thighs."
  }}
      Output: {{ "reasoning": "Black out Upper-clothes (4) and Pants (6) because the dress covers both the torso and upper thighs. Since it is sleeveless, arms (14, 15) remain visible. Legs (12, 13) are not blacked out because the dress is short and does not cover them fully.",
    "remove_decision": [0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0]}}

      
      {garment_desc} 
      
      - **Reasoning**:
      - **Output**:
      """
  
  return detect_prompt




def _get_garment_prompt():
  
  garment_prompt = """
Analyze the given garment image and provide a **detailed structured description**. Always give output only in JSON.  Focus on:

1. **Garment Type**: Clearly state what type of garment it is. Possible types: 
   - "shirt"
   - "dress"
   - "jacket"
   - "pants"
   - "gloves"
   - "sweater"
   - "skirt"
   - "shorts"
   - "vest"
   - "jumpsuit"
   - "coat"
   - "blouse"
   - "t-shirt"
   - "crop top"

2. **Garment Type Category**: Specify the category of the garment. Possible categories:
   - "upper body garment"
   - "lower body garment"
   - "full body garment"

3. **Coverage Areas**: Specify which body parts the garment covers. Possible areas:
   - "torso"
   - "arms"
   - "legs"
   - "hands"
   - "head"

4. **Sleeves & Length**: If the garment has sleeves, specify if they are:
   - "short"
   - "long"
   - "none"

5. **Leg Coverage**: If the garment covers the legs, specify if it's:
   - "full-length"
   - "knee-length"
   - "short"
   - "none"

6. **Special Features**: Mention any additional details such as:
   - **Hood** → If the garment includes a hood, covering the head. (Possible values: "yes", "no")
   - **Gloves** → If the garment has built-in gloves, covering hands. (Possible values: "yes", "no")
   - **Transparency** → If any part of the garment is see-through (e.g., mesh, lace). (Possible values: "yes", "no")
   - **Cut-outs** → If the garment has openings exposing skin (e.g., backless, ripped areas). (Possible values: "yes", "no")

7. **Description**: Provide a short textual description of the garment, summarizing its appearance, coverage, type, length, style, and key features.


### **Example Outputs:**

#### **Long-Sleeve Shirt**  
{
  "Garment Type": "shirt",
  "Garment Type Category": "upper body garment",
  "Coverage Areas": ["torso", "arms"],
  "Sleeves": "long",
  "Leg Coverage": "none",
  "Special Features": {
    "hood": "no",
    "gloves": "no",
    "transparency": "no",
    "cut-outs": "no"
  },
  Description": "A long-sleeve shirt made of cotton, providing full coverage for the torso and arms. It has a classic design with no additional features."
}

### **Full-Body Jumpsuit with Hood & Gloves**
{
  "Garment Type": "jumpsuit",
  "Garment Type Category": "full body garment",
  "Coverage Areas": ["torso", "arms", "legs", "hands", "head"],
  "Sleeves": "long",
  "Leg Coverage": "full-length",
  "Special Features": {
    "hood": "yes",
    "gloves": "yes",
    "transparency": "no",
    "cut-outs": "no"
  },
    "Description": "A full-body jumpsuit with a hood and built-in gloves. It provides full coverage for the torso, arms, legs, hands, and head."
   
}

### **Short-Sleeve Crop Top**
{
  "Garment Type": "crop top",
  "Garment Type Category": "upper body garment",
  "Coverage Areas": ["torso"],
  "Sleeves": "short",
  "Leg Coverage": "none",
  "Special Features": {
    "hood": "no",
    "gloves": "no",
    "transparency": "no",
    "cut-outs": "no"
  },
  "Description": "A casual short-sleeve crop top that covers the upper torso."
}

Output only JSON. 
"""
  return garment_prompt


def _get_erasure_prompt(labels,reasoning):
  prompt = f"""
  Here are the labels:

  {labels}

  and Here is a reasoning:
  {reasoning}

  Based on the labels and reasoning I want you to output a list containing elements that are to be reomved. For example output [3,4,5,6].

  output only JSON that contains a list: {{"erasure_labels":[indices]}}
  """
  
  return prompt
      

def fashion_masking(imgA,imgB, max_retries = 3):
    
    tries=0
    
    while tries <3:
        try:
        
            model_image = Image.fromarray(imgA)
            dress_image = Image.fromarray(imgB)
            
            pred_seg, l = get_segmentation(model_image)

            # garment_image_path = "dress.png"

            # model_image = Image.open("model_2.png").convert('RGB')
            # dress_image = Image.open("dress.png")

            base64_garment_image = encode_pil_image(dress_image)

            #base64_garment_image = encode_image(garment_image_path)

            garment_prompt = _get_garment_prompt()

            garment_desc = get_gpt_response(garment_prompt,base64_garment_image, model = "gpt-4o-mini")

            detect_prompt = _get_detect_prompt(garment_desc,labels)

            res = get_gpt_response(detect_prompt, model = "gpt-4o-mini")
            reasoning = json.loads(res)['reasoning']


            erasure_labels = get_gpt_response(_get_erasure_prompt(labels,reasoning))

            parsed_erasure_labels = json.loads(erasure_labels)["erasure_labels"]

            erased_img = erase_regions(model_image, pred_seg, parsed_erasure_labels)
            
            return_text = f"""Garment Description: {garment_desc} \n Reasoning : {reasoning} """
            
            return erased_img, return_text

        except Exception as e :
            
            tries = tries+1
            
            print(e)






demo = gr.Interface(
    fn=fashion_masking,  # Ensure this function returns (image, text)
    inputs=[
        gr.Image(label="Image A (person)", image_mode="RGB", type="numpy"),
        gr.Image(label="Image B (garment)", image_mode="RGB", type="numpy"),
    ],
    outputs=[
        gr.Image(label="Masked Output", image_mode="RGB", type="numpy"),
        gr.Textbox(label="Output Description")
    ],
    flagging_mode='never',
)

demo.launch(share=True)