Spaces:

SwimmingLiu
/

tryondiffusion

Runtime error

App Files Files Community

SwimmingLiu commited on Dec 2, 2024

Commit

d91c189

1 Parent(s): a48a9ba

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +163 -0
CONTRIBUTING.md +20 -0
README.md +38 -14
caption_images.py +52 -0
demo/__init__.py +3 -0
demo/extract_garment/README.md +14 -0
demo/extract_garment/__init__.py +1 -0
demo/extract_garment/app.py +76 -0
demo/extract_garment/requirements.txt +3 -0
demo/model_swap/.gitignore +1 -0
demo/model_swap/README.md +14 -0
demo/model_swap/__init__.py +1 -0
demo/model_swap/app.py +321 -0
demo/model_swap/requirements.txt +2 -0
demo/outfit_generator/README.md +86 -0
demo/outfit_generator/__init__.py +1 -0
demo/outfit_generator/app.py +164 -0
demo/outfit_generator/images/sample1.jpeg +0 -0
demo/outfit_generator/images/sample2.jpeg +0 -0
demo/outfit_generator/images/sample3.jpeg +0 -0
demo/outfit_generator/images/sample4.jpeg +0 -0
demo/outfit_generator/requirements.txt +10 -0
environment.yml +179 -0
main.py +44 -0
requirements.txt +15 -0
run_demo.py +18 -0
run_ootd.py +37 -0
scripts/install_conda.sh +10 -0
scripts/install_sam2.sh +11 -0
setup.py +31 -0
tryon/README.md +34 -0
tryon/__init__.py +0 -0
tryon/models/__init__.py +0 -0
tryon/models/ootdiffusion/setup.sh +30 -0
tryon/preprocessing/__init__.py +3 -0
tryon/preprocessing/captioning/__init__.py +2 -0
tryon/preprocessing/captioning/generate_caption.py +108 -0
tryon/preprocessing/extract_garment_new.py +91 -0
tryon/preprocessing/preprocess_garment.py +107 -0
tryon/preprocessing/preprocess_human.py +86 -0
tryon/preprocessing/sam2/__init__.py +23 -0
tryon/preprocessing/u2net/__init__.py +3 -0
tryon/preprocessing/u2net/data_loader.py +277 -0
tryon/preprocessing/u2net/load_u2net.py +47 -0
tryon/preprocessing/u2net/u2net_cloth_segm.py +550 -0
tryon/preprocessing/u2net/u2net_human_segm.py +520 -0
tryon/preprocessing/u2net/utils.py +10 -0
tryon/preprocessing/utils.py +91 -0
tryondiffusion/__init__.py +0 -0
tryondiffusion/diffusion.py +275 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,163 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+u2net_cloth_segm.pth
+u2net_segm.pth

CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,20 @@

+## How to contribute to tryondiffusion
+### 1. Open an issue
+We recommend opening an issue (if one doesn't already exist) and discussing your intended changes before making any changes.
+We'll be able to provide you feedback and confirm the planned modifications this way.
+### 2. Make changes in the code
+Start with forking the repository, set up the environment, install the dependencies, and make changes in the code appropriately.
+### 3. Create pull request
+Create a pull request to the main branch from your fork's branch.
+### 4. Merging pull request
+Once the pull request is created, we will review the code changes and merge the pull request as soon as possible.
+### Writing documentation
+If you are interested in writing the documentation, you can add it to README.md and create a pull request.
+For now, we are maintaining our documentation in a single file and we will add more files as it grows.

README.md CHANGED Viewed

@@ -1,14 +1,38 @@
----
-title: Tryondiffusion
-emoji: 📈
-colorFrom: green
-colorTo: purple
-sdk: gradio
-sdk_version: 5.7.1
-app_file: app.py
-pinned: false
-license: mit
-short_description: 'TryOnDiffusion: A Tale of Two UNets Implementation'
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Try On Diffusion: A Tale of Two UNets Implementation
+### [Paper Link](https://arxiv.org/abs/2306.08276)
+### [Click here](https://discord.gg/T5mPpZHxkY) to join our discord channel
+## Roadmap
+1. ~~Prepare initial implementation~~
+1. Test initial implementation with small dataset (VITON-HD)
+1. Gather sufficient data and compute resources
+1. Prepare and train final implementation
+1. Publicly release parameters
+## How to contribute to tryondiffusion
+### 1. Open an issue
+We recommend opening an issue (if one doesn't already exist) and discussing your intended changes before making any changes.
+We'll be able to provide you feedback and confirm the planned modifications this way.
+### 2. Make changes in the code
+Start with forking the repository, set up the environment, install the dependencies, and make changes in the code appropriately.
+### 3. Create pull request
+Create a pull request to the main branch from your fork's branch.
+### 4. Merging pull request
+Once the pull request is created, we will review the code changes and merge the pull request as soon as possible.
+### Writing documentation
+If you are interested in writing the documentation, you can add it to README.md and create a pull request.
+For now, we are maintaining our documentation in a single file and we will add more files as it grows.
+## License
+All material is made available under [Creative Commons BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/). You can **use** the material for **non-commercial purposes**, as long as you give appropriate credit by **citing our original [github repo](https://github.com/kailashahirwar/tryondiffusion)** and **indicate any changes** that you've made to the code.

caption_images.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import glob
+import json
+import os
+from PIL import Image
+from tryon.preprocessing.captioning import caption_image, create_llava_next_pipeline
+INPUT_IMAGES_DIR = os.path.join("fashion_datatset", "*")
+OUTPUT_CAPTIONS_DIR = "fashion_datatset_captions"
+os.makedirs(OUTPUT_CAPTIONS_DIR, exist_ok=True)
+def change_extension(filename, new_extension):
+    base_name, _ = os.path.splitext(filename)
+    return f"{base_name}.{new_extension}"
+if __name__ == '__main__':
+    model, processor = create_llava_next_pipeline()
+    images_path = sorted(glob.glob(INPUT_IMAGES_DIR))
+    for index, image_path in enumerate(images_path):
+        print(f"index: {index}, total images: {len(images_path)}, {image_path}")
+        image = Image.open(image_path)
+        prompt = """
+        You're a fashion expert. The list of clothing properties includes [color, pattern, style, fit, type, hemline,
+        material, sleeve-length, fabric-elasticity, neckline, waistline]. Please provide the following information in
+        JSON format for the outfit shown in the image. Question: What are the color, pattern, style, fit, type,
+        hemline, material, sleeve length, fabric elasticity, neckline, and waistline of the outfit in the image?
+        Answer:
+        """
+        json_file_path = os.path.join(OUTPUT_CAPTIONS_DIR,
+                                      change_extension(os.path.basename(image_path), "json"))
+        caption_file_path = os.path.join(OUTPUT_CAPTIONS_DIR,
+                                         change_extension(os.path.basename(image_path), "txt"))
+        if os.path.exists(caption_file_path) and os.path.exists(json_file_path):
+            print(f"caption already exists for {image_path}")
+            continue
+        json_data, generated_caption = caption_image(image, prompt, model, processor, json_only=False)
+        with open(json_file_path, "w") as f:
+            json.dump(json_data, f)
+        with open(caption_file_path, "w") as f:
+            f.write(generated_caption)

demo/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .extract_garment import demo as extract_garment_demo
+from .model_swap import demo as model_swap_demo
+from .outfit_generator import demo as outfit_generator_demo

demo/extract_garment/README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Extract Garment AI
+emoji: 📊
+colorFrom: indigo
+colorTo: indigo
+sdk: gradio
+sdk_version: 4.44.1
+app_file: app.py
+pinned: false
+license: mit
+short_description: Gradio Demo of Extract Garment AI by TryOn Labs.
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

demo/extract_garment/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .app import demo

demo/extract_garment/app.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import glob
+import os
+from PIL import Image
+import gradio as gr
+from tryon import preprocessing
+def extract_garment(input_img, cls):
+    print(input_img, type(input_img), cls)
+    input_dir = "input_image"
+    output_dir = "output_image"
+    os.makedirs(input_dir, exist_ok=True)
+    os.makedirs(output_dir, exist_ok=True)
+    for f in glob.glob(input_dir + "/*.*"):
+        os.remove(f)
+    for f in glob.glob(output_dir + "/*.*"):
+        os.remove(f)
+    for f in glob.glob("cloth-mask/*.*"):
+        os.remove(f)
+    input_img.save(os.path.join(input_dir, "img.jpg"))
+    preprocessing.extract_garment(inputs_dir=input_dir, outputs_dir=output_dir, cls=cls)
+    return Image.open(glob.glob(output_dir + "/*.*")[0])
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 720px;
+}
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown(f"""
+        # Clothes Extraction using U2Net
+        Pull out clothes like tops, bottoms, and dresses from a photo. This implementation is based on the [U2Net](https://github.com/xuebinqin/U-2-Net) model.
+        """)
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image(label="Input Image", type='pil', height="400px", show_label=True)
+                dropdown = gr.Dropdown(["upper", "lower", "dress"], value="upper", label="Extract garment",
+                                       info="Select the garment type you wish to extract!")
+            output_image = gr.Image(label="Extracted garment", type='pil', height="400px", show_label=True,
+                                    show_download_button=True)
+        with gr.Row():
+            submit_button = gr.Button("Submit", variant='primary', scale=1)
+            reset_button = gr.ClearButton(value="Reset", scale=1)
+    gr.on(
+        triggers=[submit_button.click],
+        fn=extract_garment,
+        inputs=[input_image, dropdown],
+        outputs=[output_image]
+    )
+    reset_button.click(
+        fn=lambda: (None, "upper", None),
+        inputs=[],
+        outputs=[input_image, dropdown, output_image],
+        concurrency_limit=1,
+    )
+if __name__ == '__main__':
+    demo.launch()

demo/extract_garment/requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==4.44.1
+pillow
+tryondiffusion

demo/model_swap/.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .token

demo/model_swap/README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Model Swap AI
+emoji: 📊
+colorFrom: indigo
+colorTo: indigo
+sdk: gradio
+sdk_version: 4.44.1
+app_file: app.py
+pinned: false
+license: mit
+short_description: Gradio Demo of Model Swap AI by TryOn Labs.
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

demo/model_swap/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .app import demo

demo/model_swap/app.py ADDED Viewed

	@@ -0,0 +1,321 @@

+import os.path
+import gradio as gr
+import json
+import requests
+import time
+from gradio_modal import Modal
+from io import BytesIO
+TRYON_SERVER_HOST = "https://prod.server.tryonlabs.ai"
+TRYON_SERVER_PORT = "80"
+if TRYON_SERVER_PORT == "80":
+    TRYON_SERVER_URL = f"{TRYON_SERVER_HOST}"
+else:
+    TRYON_SERVER_URL = f"{TRYON_SERVER_HOST}:{TRYON_SERVER_PORT}"
+TRYON_SERVER_API_URL = f"{TRYON_SERVER_URL}/api/v1/"
+def start_model_swap(input_image, prompt, cls, seed, guidance_scale, num_results, strength, inference_steps):
+    # make a request to TryOn Server
+    # 1. create an experiment image
+    print("inputs:", input_image, prompt, cls, seed, guidance_scale, num_results, strength, inference_steps)
+    if input_image is None:
+        raise gr.Error("Select an image!")
+    if prompt is None or prompt == "":
+        raise gr.Error("Enter a prompt!")
+    token = load_token()
+    if token is None or token == "":
+        raise gr.Error("You need to login first!")
+    else:
+        login(token)
+    byte_io = BytesIO()
+    input_image.save(byte_io, 'png')
+    byte_io.seek(0)
+    r = requests.post(f"{TRYON_SERVER_API_URL}experiment_image/",
+                      files={"image": (
+                          'ei_image.png',
+                          byte_io,
+                          'image/png'
+                      )},
+                      data={
+                          "type": "model",
+                          "preprocess": "false"},
+                      headers={
+                          "Authorization": f"Bearer {token}"
+                      })
+    # print(r.json())
+    if r.status_code == 200 or r.status_code == 201:
+        print("Experiment image created successfully", r.json())
+        res = r.json()
+        # 2 create an experiment
+        r2 = requests.post(f"{TRYON_SERVER_API_URL}experiment/",
+                           data={
+                               "model_id": res['id'],
+                               "action": "model_swap",
+                               "params": json.dumps({"prompt": prompt,
+                                                     "guidance_scale": guidance_scale,
+                                                     "strength": strength,
+                                                     "num_inference_steps": inference_steps,
+                                                     "seed": seed,
+                                                     "garment_class": f"{cls} garment",
+                                                     "negative_prompt": "(hands:1.15), disfigured, ugly, bad, immature"
+                                                                        ", cartoon, anime, 3d, painting, b&w, (ugly),"
+                                                                        " (pixelated), watermark, glossy, smooth, "
+                                                                        "earrings, necklace",
+                                                     "num_results": num_results})
+                           },
+                           headers={
+                               "Authorization": f"Bearer {token}"
+                           })
+        if r2.status_code == 200 or r2.status_code == 201:
+            # 3. keep checking the status of the experiment
+            res2 = r2.json()
+            print("Experiment created successfully", res2)
+            time.sleep(10)
+            experiment = res2['experiment']
+            status = fetch_experiment_status(experiment_id=experiment['id'], token=token)
+            status_status = status['status']
+            while status_status == "running":
+                time.sleep(10)
+                status = fetch_experiment_status(experiment_id=experiment['id'], token=token)
+                status_status = status['status']
+                print(f"Current status: {status_status}")
+            if status['status'] == "success":
+                print("Experiment successful")
+                print(f"Results:{status['result_images']}")
+                return status['result_images']
+            elif status['status'] == "failed":
+                print("Experiment failed")
+                raise gr.Error("Experiment failed")
+        else:
+            print(f"Error: {r2.text}")
+            raise gr.Error(f"Failure: {r2.text}")
+    else:
+        print(f"Error: {r.text}")
+        raise gr.Error(f"Failure: {r.text}")
+def fetch_experiment_status(experiment_id, token):
+    print(f"experiment id:{experiment_id}")
+    r3 = requests.get(f"{TRYON_SERVER_API_URL}experiment/{experiment_id}/",
+                      headers={
+                          "Authorization": f"Bearer {token}"
+                      })
+    if r3.status_code == 200:
+        res = r3.json()
+        if res['status'] == "running":
+            return {"status": "running"}
+        elif res['status'] == "success":
+            experiment = r3.json()['experiment']
+            result_images = [f"{TRYON_SERVER_URL}/{experiment['result']['image_url']}"]
+            if len(experiment['results']) > 0:
+                for result in experiment['results']:
+                    result_images.append(f"{TRYON_SERVER_URL}/{result['image_url']}")
+            return {"status": "success", "result_images": result_images}
+        elif res['status'] == "failed":
+            return {"status": "failed"}
+    else:
+        print(f"Error: {r3.text}")
+        return {"status": "failed"}
+def get_user_credits(token):
+    if token == "":
+        return None
+    r = requests.get(f"{TRYON_SERVER_API_URL}user/get/", headers={
+        "Authorization": f"Bearer {token}"
+    })
+    if r.status_code == 200:
+        res = r.json()
+        return res['credits']
+    else:
+        print(f"Error: {r.text}")
+        return None
+def load_token():
+    if os.path.exists(".token"):
+        with open(".token", "r") as f:
+            return json.load(f)['token']
+    else:
+        return None
+def save_token(access_token):
+    if access_token != "":
+        with open(".token", "w") as f:
+            json.dump({"token": access_token}, f)
+    else:
+        raise gr.Error("No token provided!")
+def is_logged_in():
+    loaded_token = load_token()
+    if loaded_token is None or loaded_token == "":
+        return False
+    else:
+        return True
+def login(token):
+    print("logging in...")
+    # validate token
+    r = requests.post(f"{TRYON_SERVER_URL}/api/token/verify/", data={"token": token})
+    if r.status_code == 200:
+        save_token(token)
+        return True
+    else:
+        raise gr.Error("Login failed")
+def logout():
+    print("logged out")
+    with open(".token", "w") as f:
+        json.dump({"token": ""}, f)
+    return [False, ""]
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 1024px;
+}
+#credits-col-container{
+    display:flex;
+    justify-content: right;
+    align-items: center;
+    font-size: 24px;
+    margin-right: 1rem;
+}
+#login-modal{
+    max-width: 728px;
+    margin: 0 auto;
+    margin-top: 1rem;
+    margin-bottom: 1rem;
+}
+#login-logout-btn{
+    display:inline;
+    max-width: 124px;
+}
+"""
+with gr.Blocks(css=css, theme=gr.themes.Default()) as demo:
+    print("is logged in:", is_logged_in())
+    logged_in = gr.State(is_logged_in())
+    if os.path.exists(".token"):
+        with open(".token", "r") as f:
+            user_token = gr.State(json.load(f)["token"])
+    else:
+        user_token = gr.State("")
+    with Modal(visible=False) as modal:
+        @gr.render(inputs=user_token)
+        def rerender1(user_token1):
+            with gr.Column(elem_id="login-modal"):
+                access_token = gr.Textbox(
+                    label="Token",
+                    lines=1,
+                    value=user_token1,
+                    type="password",
+                    placeholder="Enter your access token here!",
+                    info="Visit https://playground.tryonlabs.ai to retrieve your access token."
+                )
+                login_submit_btn = gr.Button("Login", scale=1, variant='primary')
+                login_submit_btn.click(
+                    fn=lambda access_token: (login(access_token), Modal(visible=False), access_token),
+                    inputs=[access_token], outputs=[logged_in, modal, user_token],
+                    concurrency_limit=1)
+    with gr.Row(elem_id="col-container"):
+        with gr.Column():
+            gr.Markdown(f"""
+            # Model Swap AI
+            ## by TryOn Labs (https://www.tryonlabs.ai)
+            Swap a human model with a artificial model generated by Artificial Model while keeping the garment intact.
+            """)
+        @gr.render(inputs=logged_in)
+        def rerender(is_logged_in):
+            with gr.Column():
+                if not is_logged_in:
+                    with gr.Row(elem_id="credits-col-container"):
+                        login_btn = gr.Button(value="Login", variant='primary', elem_id="login-logout-btn", size="sm")
+                        login_btn.click(lambda: Modal(visible=True), None, modal)
+                else:
+                    user_credits = get_user_credits(load_token())
+                    print("user_credits", user_credits)
+                    gr.HTML(f"""<div><p id="credits-col-container">Your Credits:
+                    {user_credits if user_credits is not None else "0"}</p>
+                    <p style="text-align: right;">Visit <a href="https://playground.tryonlabs.ai">
+                    TryOn AI Playground</a> to acquire more credits</p></div>""")
+                    with gr.Row(elem_id="credits-col-container"):
+                        logout_btn = gr.Button(value="Logout", scale=1, variant='primary', size="sm",
+                                               elem_id="login-logout-btn")
+                        logout_btn.click(fn=logout, inputs=None, outputs=[logged_in, user_token], concurrency_limit=1)
+    with gr.Column(elem_id="col-container"):
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image(label="Original image", type='pil', height="400px", show_label=True)
+                prompt = gr.Textbox(
+                    label="Prompt",
+                    lines=3,
+                    placeholder="Enter your prompt here!",
+                )
+                dropdown = gr.Dropdown(["upper", "lower", "dress"], value="upper", label="Retain garment",
+                                       info="Select the garment type you want to retain in the generated image!")
+            gallery = gr.Gallery(
+                label="Generated images", show_label=True, elem_id="gallery"
+                , columns=[3], rows=[1], object_fit="contain", height="auto")
+            # output_image = gr.Image(label="Swapped model", type='pil', height="400px", show_label=True,
+            #                         show_download_button=True)
+        with gr.Accordion("Advanced Settings", open=False):
+            with gr.Row():
+                seed = gr.Number(label="Seed", value=-1, interactive=True, minimum=-1)
+                guidance_scale = gr.Number(label="Guidance Scale", value=7.5, interactive=True, minimum=0.0,
+                                           maximum=10.0,
+                                           step=0.1)
+                num_results = gr.Number(label="Number of results", value=2, minimum=1, maximum=5)
+            with gr.Row():
+                strength = gr.Slider(0.00, 1.00, value=0.99, label="Strength",
+                                     info="Choose between 0.00 and 1.00", step=0.01, interactive=True)
+                inference_steps = gr.Number(label="Inference Steps", value=20, interactive=True, minimum=1, step=1)
+        with gr.Row():
+            submit_button = gr.Button("Submit", variant='primary', scale=1)
+            reset_button = gr.ClearButton(value="Reset", scale=1)
+    gr.on(
+        triggers=[submit_button.click],
+        fn=start_model_swap,
+        inputs=[input_image, prompt, dropdown, seed, guidance_scale, num_results, strength, inference_steps],
+        outputs=[gallery]
+    )
+    reset_button.click(
+        fn=lambda: (None, None, "upper", None, -1, 7.5, 2, 0.99, 20),
+        inputs=[],
+        outputs=[input_image, prompt, dropdown, gallery, seed, guidance_scale,
+                 num_results, strength, inference_steps],
+        concurrency_limit=1,
+    )
+if __name__ == '__main__':
+    demo.launch()

demo/model_swap/requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio==4.44.1
2	+ gradio_modal==0.0.3

demo/outfit_generator/README.md ADDED Viewed

	@@ -0,0 +1,86 @@

+# FLUX.1-dev LoRA Outfit Generator Gradio Demo
+## by TryOn Labs (https://www.tryonlabs.ai)
+Generate an outfit by describing the color, pattern, fit, style, material, type, etc.
+## Model description
+FLUX.1-dev LoRA Outfit Generator can create an outfit by detailing the color, pattern, fit, style, material, and type.
+## Inference
+```
+import random
+from diffusers import FluxPipeline
+import torch
+seed=42
+prompt = "denim dark blue 5-pocket ankle-length jeans in washed stretch denim slightly looser fit with a wide waist panel for best fit over the tummy and tapered legs with raw-edge frayed hems"
+PRE_TRAINED_MODEL = "black-forest-labs/FLUX.1-dev"
+FINE_TUNED_MODEL = "tryonlabs/FLUX.1-dev-LoRA-Outfit-Generator"
+# Load Flux
+pipe = FluxPipeline.from_pretrained(PRE_TRAINED_MODEL, torch_dtype=torch.float16).to("cuda")
+# Load fine-tuned model
+pipe.load_lora_weights(FINE_TUNED_MODEL, adapter_name="default", weight_name="outfit-generator.safetensors")
+seed = random.randint(0, MAX_SEED)
+generator = torch.Generator().manual_seed(seed)
+image = pipe(prompt, height=1024, width=1024, num_images_per_prompt=1, generator=generator,
+guidance_scale=4.5, num_inference_steps=40).images[0]
+image.save("gen_image.jpg")
+```
+## Dataset used
+H&M Fashion Captions Dataset - 20.5k samples
+https://huggingface.co/datasets/tomytjandra/h-and-m-fashion-caption
+## Repository used
+AI Toolkit by Ostris
+https://github.com/ostris/ai-toolkit
+## Download model
+Weights for this model are available in Safetensors format.
+[Download](https://huggingface.co/tryonlabs/FLUX.1-dev-LoRA-Outfit-Generator/tree/main) them in the Files & versions tab.
+## Install dependencies
+```
+git clone https://github.com/tryonlabs/FLUX.1-dev-LoRA-Outfit-Generator.git
+cd FLUX.1-dev-LoRA-Outfit-Generator
+conda create -n demo python=3.12
+pip install -r requirements.txt
+conda install pytorch pytorch-cuda=12.4 -c pytorch -c nvidia
+```
+## Run demo
+```
+gradio app.py
+```
+## Generated images
+![alt](images/sample1.jpeg "sample1")
+#### A dress with Color: Black, Department: Dresses, Detail: High Low,Fabric-Elasticity: No Sretch, Fit: Fitted, Hemline: Slit, Material: Gabardine, Neckline: Collared, Pattern: Solid, Sleeve-Length: Sleeveless, Style: Casual, Type: Tunic, Waistline: Regular
+***
+![alt](images/sample2.jpeg "sample2")
+#### A dress with Color: Red, Department: Dresses, Detail: Belted, Fabric-Elasticity: High Stretch, Fit: Fitted, Hemline: Flared, Material: Gabardine, Neckline: Off The Shoulder, Pattern: Floral, Sleeve-Length: Sleeveless, Style: Elegant, Type: Fit and Flare, Waistline: High
+***
+![alt](images/sample3.jpeg "sample3")
+#### A dress with Color: Multicolored, Department: Dresses, Detail: Split, Fabric-Elasticity: High Stretch, Fit: Fitted, Hemline: Slit, Material: Gabardine, Neckline: V Neck, Pattern: Leopard, Sleeve-Length: Sleeveless, Style: Casual, Type: T Shirt, Waistline: Regular
+***
+![alt](images/sample4.jpeg "sample4")
+#### A dress with Color: Brown, Department: Dresses, Detail: Zipper, Fabric-Elasticity: No Sretch, Fit: Fitted, Hemline: Asymmetrical, Material: Satin, Neckline: Spaghetti Straps, Pattern: Floral, Sleeve-Length: Sleeveless, Style: Boho, Type: Cami Top, Waistline: High
+***
+## License
+MIT [License](LICENSE)

demo/outfit_generator/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .app import demo

demo/outfit_generator/app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import json
+import os.path
+import random
+import time
+import gradio as gr
+import numpy as np
+import spaces
+import torch
+from diffusers import FluxPipeline
+device = "cuda" if torch.cuda.is_available() else "cpu"
+PRE_TRAINED_MODEL = "black-forest-labs/FLUX.1-dev"
+FINE_TUNED_MODEL = "tryonlabs/FLUX.1-dev-LoRA-Outfit-Generator"
+RESULTS_DIR = "~/results"
+os.makedirs(RESULTS_DIR, exist_ok=True)
+if torch.cuda.is_available():
+    torch_dtype = torch.bfloat16
+else:
+    torch_dtype = torch.float32
+# Load Flux
+pipe = FluxPipeline.from_pretrained(PRE_TRAINED_MODEL, torch_dtype=torch.float16).to("cuda")
+# Load your fine-tuned model
+pipe.load_lora_weights(FINE_TUNED_MODEL, adapter_name="default", weight_name="outfit-generator.safetensors")
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1024
+@spaces.GPU(duration=65)
+def infer(
+        prompt,
+        seed=42,
+        randomize_seed=False,
+        width=1024,
+        height=1024,
+        guidance_scale=4.5,
+        num_inference_steps=40,
+        progress=gr.Progress(track_tqdm=True),
+):
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator().manual_seed(seed)
+    image = pipe(prompt, height=width, width=height, num_images_per_prompt=1, generator=generator,
+                 guidance_scale=guidance_scale,
+                 num_inference_steps=num_inference_steps).images[0]
+    try:
+        # save image
+        current_time = int(time.time() * 1000)
+        image.save(os.path.join(RESULTS_DIR, f"gen_img_{current_time}.png"))
+        with open(os.path.join(RESULTS_DIR, f"gen_img_{current_time}.json"), "w") as f:
+            json.dump({"prompt": prompt, "height": height, "width": width, "guidance_scale": guidance_scale,
+                       "num_inference_steps": num_inference_steps, "seed": seed}, f)
+    except Exception as e:
+        print(str(e))
+    return image, seed
+examples = [
+    "stripe red striped jersey top in a soft cotton and modal blend with short sleeves a chest pocket and rounded hem",
+    "A dress with Color: Orange, Department: Dresses, Detail: Split Thigh, Fabric-Elasticity: No Sretch, Fit: Fitted, Hemline: Slit, Material: Gabardine, Neckline: Gathered, Pattern: Tropical, Sleeve-Length: Sleeveless, Style: Boho, Type: A Line Skirt, Waistline: High",
+    "treatment dark pink knee-length skirt in crocodile-patterned imitation leather high waist with belt loops and press-studs a zip fly diagonal side pockets and a slit at the front the polyester content of the skirt is partly recycled",
+    "A dress with Color: Maroon, Department: Dresses, Detail: Ruched Bust, Fabric-Elasticity: Slight Stretch, Fit: Fitted, Hemline: Slit, Material: Gabardine, Neckline: Spaghetti Straps, Pattern: Floral, Sleeve-Length: Sleeveless, Style: Boho, Type: Cami Top, Waistline: Regular",
+    "denim dark blue 5-pocket ankle-length jeans in washed stretch denim slightly looser fit with a wide waist panel for best fit over the tummy and tapered legs with raw-edge frayed hems"
+]
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 768px;
+}
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown(f"""
+        # FLUX.1-dev LoRA Outfit Generator
+        ## by TryOn Labs (https://www.tryonlabs.ai)
+        Generate an outfit by describing the color, pattern, fit, style, material, type, etc.
+        """)
+        with gr.Row():
+            prompt = gr.Text(
+                label="Prompt",
+                show_label=False,
+                max_lines=1,
+                placeholder="Enter your prompt",
+                container=False,
+            )
+            run_button = gr.Button("Run", scale=0, variant="primary")
+        result = gr.Image(label="Result", show_label=False)
+        with gr.Accordion("Advanced Settings", open=False):
+            seed = gr.Slider(
+                label="Seed",
+                minimum=0,
+                maximum=MAX_SEED,
+                step=1,
+                value=0,
+            )
+            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+            with gr.Row():
+                width = gr.Slider(
+                    label="Width",
+                    minimum=512,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=1024,
+                )
+                height = gr.Slider(
+                    label="Height",
+                    minimum=512,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=1024,
+                )
+            with gr.Row():
+                guidance_scale = gr.Slider(
+                    label="Guidance scale",
+                    minimum=0.0,
+                    maximum=7.5,
+                    step=0.1,
+                    value=4.5,
+                )
+                num_inference_steps = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=1,
+                    maximum=50,
+                    step=1,
+                    value=40,
+                )
+        gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True,
+                    cache_mode="lazy")
+    gr.on(
+        triggers=[run_button.click, prompt.submit],
+        fn=infer,
+        inputs=[
+            prompt,
+            seed,
+            randomize_seed,
+            width,
+            height,
+            guidance_scale,
+            num_inference_steps,
+        ],
+        outputs=[result, seed],
+    )
+if __name__ == "__main__":
+    demo.launch(share=True)

demo/outfit_generator/images/sample1.jpeg ADDED Viewed

demo/outfit_generator/images/sample2.jpeg ADDED Viewed

demo/outfit_generator/images/sample3.jpeg ADDED Viewed

demo/outfit_generator/images/sample4.jpeg ADDED Viewed

demo/outfit_generator/requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+spaces
+gradio
+diffusers
+torch
+numpy
+transformers
+accelerate
+protobuf
+sentencepiece
+peft==0.13.2

environment.yml ADDED Viewed

	@@ -0,0 +1,179 @@

+name: tryondiffusion
+channels:
+  - defaults
+dependencies:
+  - blas=1.0=mkl
+  - bottleneck=1.3.5=py310h4e76f89_0
+  - bzip2=1.0.8=h1de35cc_0
+  - ca-certificates=2023.08.22=hecd8cb5_0
+  - cffi=1.15.1=py310h6c40b1e_3
+  - gmp=6.2.1=he9d5cce_3
+  - gmpy2=2.1.2=py310hd5de756_0
+  - intel-openmp=2023.1.0=ha357a0b_43547
+  - jinja2=3.1.2=py310hecd8cb5_0
+  - libcxx=14.0.6=h9765a3e_0
+  - libffi=3.4.4=hecd8cb5_0
+  - libprotobuf=3.20.3=hfff2838_0
+  - libuv=1.44.2=h6c40b1e_0
+  - mkl=2023.1.0=h8e150cf_43559
+  - mkl-service=2.4.0=py310h6c40b1e_1
+  - mkl_fft=1.3.8=py310h6c40b1e_0
+  - mkl_random=1.2.4=py310ha357a0b_0
+  - mpc=1.1.0=h6ef4df4_1
+  - mpfr=4.0.2=h9066e36_1
+  - mpmath=1.3.0=py310hecd8cb5_0
+  - ncurses=6.4=hcec6c5f_0
+  - networkx=3.1=py310hecd8cb5_0
+  - ninja=1.10.2=hecd8cb5_5
+  - ninja-base=1.10.2=haf03e11_5
+  - numexpr=2.8.7=py310h827a554_0
+  - openssl=3.0.11=hca72f7f_2
+  - pandas=2.1.1=py310h3ea8b11_0
+  - pip=23.2.1=py310hecd8cb5_0
+  - pycparser=2.21=pyhd3eb1b0_0
+  - python=3.10.13=h5ee71fb_0
+  - python-dateutil=2.8.2=pyhd3eb1b0_0
+  - python-tzdata=2023.3=pyhd3eb1b0_0
+  - pytz=2023.3.post1=py310hecd8cb5_0
+  - readline=8.2=hca72f7f_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sqlite=3.41.2=h6c40b1e_0
+  - tbb=2021.8.0=ha357a0b_0
+  - tk=8.6.12=h5d9f67b_0
+  - tzdata=2023c=h04d1e81_0
+  - wheel=0.38.4=py310hecd8cb5_0
+  - xz=5.4.2=h6c40b1e_0
+  - zlib=1.2.13=h4dc903c_0
+  - pip:
+    - absl-py==2.0.0
+    - aiofiles==23.2.1
+    - annotated-types==0.6.0
+    - anyio==4.3.0
+    - appnope==0.1.3
+    - asttokens==2.4.0
+    - astunparse==1.6.3
+    - backcall==0.2.0
+    - cachetools==5.3.1
+    - carvekit==4.1.1
+    - certifi==2023.7.22
+    - charset-normalizer==3.2.0
+    - click==8.1.7
+    - comm==0.1.4
+    - contourpy==1.1.1
+    - cycler==0.11.0
+    - debugpy==1.8.0
+    - decorator==5.1.1
+    - diffusers==0.29.2
+    - einops==0.7.0
+    - exceptiongroup==1.1.3
+    - executing==1.2.0
+    - fastapi==0.108.0
+    - ffmpy==0.3.3
+    - filelock==3.12.4
+    - flatbuffers==23.5.26
+    - fonttools==4.42.1
+    - fsspec==2024.3.1
+    - gast==0.5.4
+    - google-auth==2.23.3
+    - google-auth-oauthlib==1.0.0
+    - google-pasta==0.2.0
+    - gradio==4.39.0
+    - gradio-client==1.1.1
+    - gradio-modal==0.0.3
+    - grpcio==1.59.0
+    - h11==0.14.0
+    - h5py==3.10.0
+    - httpcore==1.0.5
+    - httpx==0.27.0
+    - huggingface-hub==0.23.4
+    - idna==3.4
+    - imageio==2.34.0
+    - importlib-metadata==8.0.0
+    - importlib-resources==6.4.0
+    - ipykernel==6.25.2
+    - ipython==8.15.0
+    - jedi==0.19.0
+    - jupyter-client==8.3.1
+    - jupyter-core==5.3.1
+    - keras==2.14.0
+    - kiwisolver==1.4.5
+    - lazy-loader==0.3
+    - libclang==16.0.6
+    - loguru==0.7.2
+    - markdown==3.5
+    - markdown-it-py==3.0.0
+    - markupsafe==2.1.3
+    - matplotlib==3.8.0
+    - matplotlib-inline==0.1.6
+    - mdurl==0.1.2
+    - ml-dtypes==0.2.0
+    - nest-asyncio==1.5.8
+    - numpy==1.26.4
+    - oauthlib==3.2.2
+    - opencv-python==4.8.1.78
+    - opt-einsum==3.3.0
+    - orjson==3.10.6
+    - packaging==23.1
+    - parso==0.8.3
+    - pexpect==4.8.0
+    - pickleshare==0.7.5
+    - pillow==10.1.0
+    - platformdirs==3.10.0
+    - prompt-toolkit==3.0.39
+    - protobuf==4.24.4
+    - psutil==5.9.5
+    - ptyprocess==0.7.0
+    - pure-eval==0.2.2
+    - pyasn1==0.5.0
+    - pyasn1-modules==0.3.0
+    - pydantic==2.5.3
+    - pydantic-core==2.14.6
+    - pydub==0.25.1
+    - pygments==2.16.1
+    - pyparsing==3.1.1
+    - python-dotenv==1.0.1
+    - python-multipart==0.0.9
+    - pyyaml==6.0.1
+    - pyzmq==25.1.1
+    - regex==2024.5.15
+    - requests==2.31.0
+    - requests-oauthlib==1.3.1
+    - rich==13.7.1
+    - rsa==4.9
+    - ruff==0.5.5
+    - safetensors==0.4.3
+    - scikit-image==0.22.0
+    - scipy==1.11.4
+    - semantic-version==2.10.0
+    - setuptools==69.0.3
+    - shellingham==1.5.4
+    - sniffio==1.3.1
+    - stack-data==0.6.2
+    - starlette==0.32.0.post1
+    - sympy==1.12
+    - tensorboard==2.14.1
+    - tensorboard-data-server==0.7.1
+    - tensorflow==2.14.0
+    - tensorflow-estimator==2.14.0
+    - tensorflow-io-gcs-filesystem==0.34.0
+    - termcolor==2.3.0
+    - tifffile==2024.2.12
+    - tokenizers==0.19.1
+    - tomlkit==0.12.0
+    - torch==2.1.2
+    - torchvision==0.16.2
+    - tornado==6.3.3
+    - tqdm==4.66.1
+    - traitlets==5.10.0
+    - transformers==4.42.4
+    - typer==0.12.3
+    - typing==3.7.4.3
+    - typing-extensions==4.8.0
+    - urllib3==2.0.5
+    - uvicorn==0.25.0
+    - wcwidth==0.2.6
+    - websockets==11.0.3
+    - werkzeug==3.0.0
+    - wrapt==1.14.1
+    - zipp==3.19.2
+prefix: /Users/apple/miniconda3/envs/tryondiffusion

main.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from dotenv import load_dotenv
+load_dotenv()
+import time
+import os
+import argparse
+from tryon.preprocessing import segment_human, segment_garment, extract_garment
+if __name__ == '__main__':
+    argp = argparse.ArgumentParser(description="Tryon preprocessing")
+    argp.add_argument('-d',
+                      '--dataset',
+                      type=str, default="data", help='Path of the dataset dir')
+    argp.add_argument('-a',
+                      '--action',
+                      type=str, default="", help='Ex. segment_garment, extract_garment, segment_human')
+    argp.add_argument('-c',
+                      '--cls',
+                      type=str, default="upper", help='Ex. upper, lower, all')
+    args = argp.parse_args()
+    if args.action == "segment_garment":
+        # 1. segment garment
+        print('Start time:', int(time.time()))
+        segment_garment(inputs_dir=os.path.join(args.dataset, "original_cloth"),
+                        outputs_dir=os.path.join(args.dataset, "garment_segmented"), cls=args.cls)
+        print("End time:", int(time.time()))
+    elif args.action == "extract_garment":
+        # 2. extract garment
+        print('Start time:', int(time.time()))
+        extract_garment(inputs_dir=os.path.join(args.dataset, "original_cloth"),
+                        outputs_dir=os.path.join(args.dataset, "cloth"), cls=args.cls, resize_to_width=400)
+        print("End time:", int(time.time()))
+    elif args.action == "segment_human":
+        # 2. segment human
+        print('Start time:', int(time.time()))
+        image_path = os.path.join(args.dataset, "original_human", "model.jpg")
+        output_dir = os.path.join(args.dataset, "human_segmented")
+        segment_human(image_path=image_path, output_dir=output_dir)
+        print("End time:", int(time.time()))

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+torch
+numpy
+opencv-python
+pillow
+matplotlib
+tqdm
+torchvision
+einops
+python-dotenv
+scikit-image
+diffusers
+transformers
+gradio==4.44.1
+gradio_modal==0.0.3
+python-dotenv

run_demo.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import argparse
+if __name__ == '__main__':
+    argp = argparse.ArgumentParser(description="Gradio demo")
+    argp.add_argument('-n',
+                      '--name',
+                      type=str, default="data", help='Name of the gradio demo to launch')
+    args = argp.parse_args()
+    if args.name == "extract_garment":
+        from demo import extract_garment_demo as demo
+        demo.launch()
+    elif args.name == "model_swap":
+        from demo import model_swap_demo as demo
+        demo.launch()
+    elif args.name == "outfit_generator":
+        from demo import outfit_generator_demo as demo
+        demo.launch()

run_ootd.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import argparse
+import subprocess
+import os
+import pathlib
+parser = argparse.ArgumentParser(description='run ootd')
+parser.add_argument('--gpu_id', '-g', type=int, default=0, required=False)
+parser.add_argument('--model_path', type=str, default="", required=True)
+parser.add_argument('--cloth_path', type=str, default="", required=True)
+parser.add_argument('--output_path', type=str, default="", required=True)
+parser.add_argument('--model_type', type=str, default="hd", required=False)
+parser.add_argument('--category', '-c', type=int, default=0, required=False)
+parser.add_argument('--scale', type=float, default=2.0, required=False)
+parser.add_argument('--step', type=int, default=20, required=False)
+parser.add_argument('--sample', type=int, default=4, required=False)
+parser.add_argument('--seed', type=int, default=-1, required=False)
+args = parser.parse_args()
+print(args)
+if __name__ == '__main__':
+    ootdiffusion_dir = "/home/ubuntu/ootdiffusion"
+    command = (f"{os.path.join(str(pathlib.Path.home()), 'miniconda3/envs/ootdiffusion/bin/python')} "
+               f"run.py --model_path {args.model_path} --cloth_path {args.cloth_path} "
+               f"--output_path {args.output_path} --model_type {args.model_type} --category {args.category} "
+               f"--image_scale {args.scale} --gpu_id {args.gpu_id} --n_samples {args.sample} --seed {args.seed} "
+               f"--n_steps {args.step}")
+    print("command:", command, command.split(" "))
+    p = subprocess.Popen(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                         cwd=ootdiffusion_dir)
+    out, err = p.communicate()
+    print(out, err)

scripts/install_conda.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+# Setup Ubuntu
+sudo apt update --yes
+sudo apt upgrade --yes
+# Get Miniconda and make it the main Python interpreter
+wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
+bash ~/miniconda.sh -b -p ~/miniconda
+rm ~/miniconda.sh
+export PATH=~/miniconda/bin:$PATH

scripts/install_sam2.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+ENV_NAME="sam2"
+sudo apt-get -y update && sudo apt-get install -y --no-install-recommends ffmpeg libavutil-dev libavcodec-dev libavformat-dev libswscale-dev pkg-config build-essential libffi-dev
+git clone https://github.com/facebookresearch/sam2.git ~/$ENV_NAME
+conda create -n $ENV_NAME python=3.10
+conda install -y -n $ENV_NAME pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch -c nvidia
+~/miniconda3/envs/$ENV_NAME/bin/pip install -y -e ~/$ENV_NAME
+sh ~/$ENV_NAME/checkpoints/download_ckpts.sh
+mv sam2.1_hiera_base_plus.pt ~/$ENV_NAME/checkpoints/
+mv sam2.1_hiera_large.pt ~/$ENV_NAME/checkpoints/
+mv sam2.1_hiera_small.pt ~/$ENV_NAME/checkpoints/
+mv sam2.1_hiera_tiny.pt ~/$ENV_NAME/checkpoints/

setup.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from pathlib import Path
+from setuptools import setup, find_packages
+this_directory = Path(__file__).parent
+long_description = (this_directory / "README.md").read_text()
+setup(
+    name="tryondiffusion",
+    version="0.1.0",
+    license='Creative Commons BY-NC 4.0',
+    packages=find_packages(),
+    long_description=long_description,
+    long_description_content_type='text/markdown',
+    url='https://github.com/kailashahirwar/tryondiffusion',
+    keywords='Unofficial implementation of TryOnDiffusion: A Tale Of Two UNets',
+    install_requires=[
+        "torch",
+        "numpy",
+        "opencv-python",
+        "pillow",
+        "matplotlib",
+        "tqdm",
+        "torchvision",
+        "einops",
+        "scipy",
+        "scikit-image",
+        "gradio==4.44.1",
+        "gradio_modal==0.0.3"
+    ]
+)

tryon/README.md ADDED Viewed

	@@ -0,0 +1,34 @@

+# Try-On Preprocessing
+Before you start, make a .env file in your project's main folder. Put these environment variables inside it.
+```
+U2NET_CLOTH_SEG_CHECKPOINT_PATH=cloth_segm.pth
+```
+#### Remember to load environment variables before you start running scripts.
+```
+from dotenv import load_dotenv
+load_dotenv()
+```
+### segment garment
+```
+from tryon.preprocessing import segment_garment
+segment_garment(inputs_dir=<inputs_dir>,
+               outputs_dir=<outputs_dir>, cls=<cls>)
+```
+possible values for cls: lower, upper, all
+### extract garment
+```
+from tryon.preprocessing import extract_garment
+extract_garment(inputs_dir=<inputs_dir>,
+               outputs_dir=<outputs_dir>, cls=<cls>)
+```

tryon/__init__.py ADDED Viewed

File without changes

tryon/models/__init__.py ADDED Viewed

File without changes

tryon/models/ootdiffusion/setup.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+ENV_NAME="ootdiffusion"
+PROJECT_DIR="/home/ubuntu/ootdiffusion"
+if [ ! -d ~/miniconda3/envs/$ENV_NAME ]; then
+  echo "creating conda environment"
+  conda create -y -n $ENV_NAME python==3.10
+fi
+# clone repository
+if [ ! -d $PROJECT_DIR ]; then
+  echo "cloning OOTDiffusion repository"
+  git clone https://github.com/tryonlabs/OOTDiffusion.git $PROJECT_DIR
+fi
+~/miniconda3/envs/$ENV_NAME/bin/pip install -r $PROJECT_DIR/requirements.txt
+if [ ! -d $PROJECT_DIR/checkpoints/ootd ]; then
+  echo "downloading checkpoints"
+  # download checkpoints
+  git clone https://huggingface.co/levihsu/OOTDiffusion ~/ootd-checkpoints
+  git clone https://huggingface.co/openai/clip-vit-large-patch14 ~/clip-vit-large-patch14
+  mv ~/ootd-checkpoints/checkpoints/* $PROJECT_DIR/checkpoints/
+  rm -rf ~/ootd-checkpoints
+  mv ~/clip-vit-large-patch14 $PROJECT_DIR/checkpoints/
+  rm  -rf ~/clip-vit-large-patch14
+fi

tryon/preprocessing/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .preprocess_garment import segment_garment, extract_garment
+from .utils import convert_to_jpg
+from .preprocess_human import segment_human

tryon/preprocessing/captioning/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .generate_caption import (caption_image, create_llava_next_pipeline,
2	+ create_phi35mini_pipeline, convert_outfit_json_to_caption)

tryon/preprocessing/captioning/generate_caption.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import json
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
+def caption_image(image, question, model=None, processor=None, json_only=False):
+    """
+    Extract outfit details using an image-to-text model
+    :param image: input image
+    :param question: question
+    :param model: model pipeline
+    :param processor: processor
+    :param json_only: True or False - if json only
+    :return: json data
+    """
+    if model is None and processor is None:
+        model, processor = create_llava_next_pipeline()
+    conversation = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image"},
+                {"type": "text", "text": question},
+            ],
+        },
+    ]
+    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
+    inputs = processor(image, prompt, return_tensors="pt").to("cuda:0")
+    output = model.generate(**inputs, max_new_tokens=300)
+    output = processor.decode(output[0], skip_special_tokens=True).split("[/INST]")[-1]
+    json_data = json.loads(output.replace("```json", "").replace("```", "").strip())
+    if not json_only:
+        generated_caption = convert_outfit_json_to_caption(json_data)
+    else:
+        generated_caption = None
+    return json_data, generated_caption
+def create_phi35mini_pipeline():
+    """
+    Create Phi-3.5-mini-instruct pipeline
+    :return: model pipeline
+    """
+    torch.random.manual_seed(0)
+    model = AutoModelForCausalLM.from_pretrained(
+        "microsoft/Phi-3.5-mini-instruct",
+        device_map="cuda",
+        torch_dtype="auto",
+        trust_remote_code=True,
+        attn_implementation="flash_attention_2"
+    )
+    tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+    )
+    return pipe
+def create_llava_next_pipeline():
+    """
+    Create LlaVA-NeXT pipeline
+    :return: model pipeline
+    """
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+    model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf",
+                                                              torch_dtype=torch.float16,
+                                                              low_cpu_mem_usage=True)
+    model.to(device)
+    return model, processor
+def convert_outfit_json_to_caption(json_data, pipe=None):
+    """
+    Convert JSON data of an outfit into a natural language caption
+    :param json_data: json data
+    :param pipe: model pipeline
+    :return: generated caption
+    """
+    if pipe is None:
+        pipe = create_phi35mini_pipeline()
+    generation_args = {
+        "max_new_tokens": 300,
+        "return_full_text": False,
+        "temperature": 0.0,
+        "do_sample": False,
+    }
+    messages = [{"role": "user",
+                 "content": f'Convert the {json.dumps(json_data)} JSON data into a natural '
+                            f'language paragraph beginning with "An outfit with"'}]
+    output = pipe(messages, **generation_args)[0]['generated_text'].strip()
+    print(f"Output: {output}")
+    return output

tryon/preprocessing/extract_garment_new.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+import numpy as np
+import torch
+import torch.nn.functional as F
+from PIL import Image
+from torchvision import transforms
+from .u2net import load_cloth_segm_model
+from .utils import NormalizeImage, naive_cutout, resize_by_bigger_index, image_resize
+def extract_garment(image, cls="all", resize_to_width=None, net=None, device=None):
+    """
+    extracts garments from the given image
+    :param image: input image
+    :param cls: garment classes to extract
+    :param resize_to_width: if required
+    :return: extracted garments
+    """
+    if net is None:
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        net = load_cloth_segm_model(device, os.environ.get("U2NET_CLOTH_SEGM_CHECKPOINT_PATH"), in_ch=3, out_ch=4)
+    transform_fn = transforms.Compose(
+        [transforms.ToTensor(),
+         NormalizeImage(0.5, 0.5)]
+    )
+    img_size = image.size
+    img = image.resize((768, 768), Image.BICUBIC)
+    image_tensor = transform_fn(img)
+    image_tensor = torch.unsqueeze(image_tensor, 0)
+    with torch.no_grad():
+        output_tensor = net(image_tensor.to(device))
+        output_tensor = F.log_softmax(output_tensor[0], dim=1)
+        output_tensor = torch.max(output_tensor, dim=1, keepdim=True)[1]
+        output_tensor = torch.squeeze(output_tensor, dim=0)
+        output_arr = output_tensor.cpu().numpy()
+    classes = {1: "upper", 2: "lower", 3: "dress"}
+    if cls == "all":
+        classes_to_save = []
+        # Check which classes are present in the image
+        for cls in range(1, 4):  # Exclude background class (0)
+            if np.any(output_arr == cls):
+                classes_to_save.append(cls)
+    elif cls == "upper":
+        classes_to_save = [1]
+    elif cls == "lower":
+        classes_to_save = [2]
+    elif cls == "dress":
+        classes_to_save = [3]
+    else:
+        raise ValueError(f"Unknown cls: {cls}")
+    garments = dict()
+    for cls1 in classes_to_save:
+        alpha_mask = (output_arr == cls1).astype(np.uint8) * 255
+        alpha_mask = alpha_mask[0]  # Selecting the first channel to make it 2D
+        alpha_mask_img = Image.fromarray(alpha_mask, mode='L')
+        alpha_mask_img = alpha_mask_img.resize(img_size, Image.BICUBIC)
+        cutout = np.array(naive_cutout(image, alpha_mask_img))
+        cutout = resize_by_bigger_index(cutout)
+        canvas = np.ones((1024, 768, 3), np.uint8) * 255
+        y1, y2 = (canvas.shape[0] - cutout.shape[0]) // 2, (canvas.shape[0] + cutout.shape[0]) // 2
+        x1, x2 = (canvas.shape[1] - cutout.shape[1]) // 2, (canvas.shape[1] + cutout.shape[1]) // 2
+        alpha_s = cutout[:, :, 3] / 255.0
+        alpha_l = 1.0 - alpha_s
+        for c in range(0, 3):
+            canvas[y1:y2, x1:x2, c] = (alpha_s * cutout[:, :, c] +
+                                       alpha_l * canvas[y1:y2, x1:x2, c])
+        # resize image before saving
+        if resize_to_width:
+            canvas = image_resize(canvas, width=resize_to_width)
+        canvas = Image.fromarray(canvas)
+        garments[classes[cls1]] = canvas
+    return garments

tryon/preprocessing/preprocess_garment.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import glob
+import os
+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn.functional as F
+from PIL import Image
+from torchvision import transforms
+from tqdm import tqdm
+from .u2net import load_cloth_segm_model
+from .utils import NormalizeImage, naive_cutout, resize_by_bigger_index, image_resize
+def segment_garment(inputs_dir, outputs_dir, cls="all"):
+    os.makedirs(outputs_dir, exist_ok=True)
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    transform_fn = transforms.Compose(
+        [transforms.ToTensor(),
+         NormalizeImage(0.5, 0.5)]
+    )
+    net = load_cloth_segm_model(device, os.environ.get("U2NET_CLOTH_SEGM_CHECKPOINT_PATH"), in_ch=3, out_ch=4)
+    images_list = sorted(os.listdir(inputs_dir))
+    pbar = tqdm(total=len(images_list))
+    for image_name in images_list:
+        img = Image.open(os.path.join(inputs_dir, image_name)).convert('RGB')
+        img_size = img.size
+        img = img.resize((768, 768), Image.BICUBIC)
+        image_tensor = transform_fn(img)
+        image_tensor = torch.unsqueeze(image_tensor, 0)
+        with torch.no_grad():
+            output_tensor = net(image_tensor.to(device))
+            output_tensor = F.log_softmax(output_tensor[0], dim=1)
+            output_tensor = torch.max(output_tensor, dim=1, keepdim=True)[1]
+            output_tensor = torch.squeeze(output_tensor, dim=0)
+            output_arr = output_tensor.cpu().numpy()
+        if cls == "all":
+            classes_to_save = []
+            # Check which classes are present in the image
+            for cls in range(1, 4):  # Exclude background class (0)
+                if np.any(output_arr == cls):
+                    classes_to_save.append(cls)
+        elif cls == "upper":
+            classes_to_save = [1]
+        elif cls == "lower":
+            classes_to_save = [2]
+        elif cls == "dress":
+            classes_to_save = [3]
+        else:
+            raise ValueError(f"Unknown cls: {cls}")
+        for cls1 in classes_to_save:
+            alpha_mask = (output_arr == cls1).astype(np.uint8) * 255
+            alpha_mask = alpha_mask[0]  # Selecting the first channel to make it 2D
+            alpha_mask_img = Image.fromarray(alpha_mask, mode='L')
+            alpha_mask_img = alpha_mask_img.resize(img_size, Image.BICUBIC)
+            alpha_mask_img.save(os.path.join(outputs_dir, f'{image_name.split(".")[0]}_{cls1}.jpg'))
+        pbar.update(1)
+    pbar.close()
+def extract_garment(inputs_dir, outputs_dir, cls="all", resize_to_width=None):
+    os.makedirs(outputs_dir, exist_ok=True)
+    cloth_mask_dir = os.path.join(Path(outputs_dir).parent.absolute(), "cloth-mask")
+    os.makedirs(cloth_mask_dir, exist_ok=True)
+    segment_garment(inputs_dir, os.path.join(Path(outputs_dir).parent.absolute(), "cloth-mask"), cls=cls)
+    images_path = sorted(glob.glob(os.path.join(inputs_dir, "*")))
+    masks_path = sorted(glob.glob(os.path.join(cloth_mask_dir, "*")))
+    img = Image.open(images_path[0])
+    for mask_path in masks_path:
+        mask = Image.open(mask_path)
+        cutout = np.array(naive_cutout(img, mask))
+        cutout = resize_by_bigger_index(cutout)
+        canvas = np.ones((1024, 768, 3), np.uint8) * 255
+        y1, y2 = (canvas.shape[0] - cutout.shape[0]) // 2, (canvas.shape[0] + cutout.shape[0]) // 2
+        x1, x2 = (canvas.shape[1] - cutout.shape[1]) // 2, (canvas.shape[1] + cutout.shape[1]) // 2
+        alpha_s = cutout[:, :, 3] / 255.0
+        alpha_l = 1.0 - alpha_s
+        for c in range(0, 3):
+            canvas[y1:y2, x1:x2, c] = (alpha_s * cutout[:, :, c] +
+                                       alpha_l * canvas[y1:y2, x1:x2, c])
+        # resize image before saving
+        if resize_to_width:
+            canvas = image_resize(canvas, width=resize_to_width)
+        canvas = Image.fromarray(canvas)
+        canvas.save(os.path.join(outputs_dir, f"{os.path.basename(mask_path).split('.')[0]}.jpg"), format='JPEG')

tryon/preprocessing/preprocess_human.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import os
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from skimage import io
+from torch.autograd import Variable
+from torch.utils.data import DataLoader
+from torchvision import transforms
+from .u2net import RescaleT, ToTensorLab, SalObjDataset, normPRED, load_human_segm_model
+def pred_to_image(predictions, image_path):
+    im = Image.fromarray(predictions.squeeze().cpu().data.numpy() * 255).convert('RGB')
+    image = io.imread(image_path)
+    imo = im.resize((image.shape[1], image.shape[0]), resample=Image.BILINEAR)
+    return imo
+def segment_human(image_path, output_dir):
+    """
+    Segment human using U-2-Net
+    :param image_path: image path
+    :param output_dir: output directory
+    """
+    model_name = "u2net"
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    images = [image_path]
+    # 1. dataloader
+    test_salobj_dataset = SalObjDataset(img_name_list=images,
+                                        lbl_name_list=[],
+                                        transform=transforms.Compose([RescaleT(320),
+                                                                      ToTensorLab(flag=0)])
+                                        )
+    test_salobj_dataloader = DataLoader(test_salobj_dataset,
+                                        batch_size=1,
+                                        shuffle=False,
+                                        num_workers=1)
+    net = load_human_segm_model(device, model_name)
+    # 2. inference
+    for i_test, data_test in enumerate(test_salobj_dataloader):
+        print("inferencing:", images[i_test].split(os.sep)[-1])
+        inputs_test = data_test['image']
+        inputs_test = inputs_test.type(torch.FloatTensor)
+        if torch.cuda.is_available():
+            inputs_test = Variable(inputs_test.cuda())
+        else:
+            inputs_test = Variable(inputs_test)
+        d1, d2, d3, d4, d5, d6, d7 = net(inputs_test)
+        # normalization
+        pred = d1[:, 0, :, :]
+        pred = normPRED(pred)
+        mask = pred_to_image(pred, images[i_test])
+        mask_cv2 = cv2.cvtColor(np.array(mask), cv2.COLOR_RGB2BGR)
+        subimage = cv2.subtract(mask_cv2, cv2.imread(images[i_test]))
+        original = Image.open(images[i_test])
+        subimage = Image.fromarray(cv2.cvtColor(subimage, cv2.COLOR_BGR2RGB))
+        subimage = subimage.convert("RGBA")
+        original = original.convert("RGBA")
+        subdata = subimage.getdata()
+        ogdata = original.getdata()
+        newdata = []
+        for i in range(subdata.size[0] * subdata.size[1]):
+            if subdata[i][0] == 0 and subdata[i][1] == 0 and subdata[i][2] == 0:
+                newdata.append((231, 231, 231, 231))
+            else:
+                newdata.append(ogdata[i])
+        subimage.putdata(newdata)
+        subimage.save(os.path.join(output_dir, f"{images[i_test].split(os.sep)[-1].split('.')[0]}.png"))
+        del d1, d2, d3, d4, d5, d6, d7

tryon/preprocessing/sam2/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+from PIL import Image
+from pathlib import Path
+import numpy as np
+import torch
+from sam2.build_sam import build_sam2
+from sam2.sam2_image_predictor import SAM2ImagePredictor
+SAM2_DIR = os.path.join(str(Path.home()), 'sam2')
+checkpoint = os.path.join(SAM2_DIR, "checkpoints/sam2.1_hiera_large.pt")
+model_cfg = "configs/sam2.1/sam2.1_hiera_l.yaml"
+predictor = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint))
+with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
+    predictor.set_image(Image.open("img000.webp"))
+    input_point = np.array([[500, 375]])
+    input_label = np.array([1])
+    masks, _, _ = predictor.predict(
+    point_coords=input_point,
+    point_labels=input_label,
+    multimask_output=True)
+    print(masks)

tryon/preprocessing/u2net/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .load_u2net import load_cloth_segm_model, load_human_segm_model
+from .data_loader import SalObjDataset, RescaleT, ToTensorLab, ToTensor
+from .utils import normPRED

tryon/preprocessing/u2net/data_loader.py ADDED Viewed

	@@ -0,0 +1,277 @@

+from __future__ import print_function, division
+import random
+import numpy as np
+import torch
+from skimage import io, transform, color
+from torch.utils.data import Dataset
+class RescaleT(object):
+    def __init__(self, output_size):
+        assert isinstance(output_size, (int, tuple))
+        self.output_size = output_size
+    def __call__(self, sample):
+        imidx, image, label = sample['imidx'], sample['image'], sample['label']
+        h, w = image.shape[:2]
+        if isinstance(self.output_size, int):
+            if h > w:
+                new_h, new_w = self.output_size * h / w, self.output_size
+            else:
+                new_h, new_w = self.output_size, self.output_size * w / h
+        else:
+            new_h, new_w = self.output_size
+        new_h, new_w = int(new_h), int(new_w)
+        # #resize the image to new_h x new_w and convert image from range [0,255] to [0,1]
+        # img = transform.resize(image,(new_h,new_w),mode='constant')
+        # lbl = transform.resize(label,(new_h,new_w),mode='constant', order=0, preserve_range=True)
+        img = transform.resize(image, (self.output_size, self.output_size), mode='constant')
+        lbl = transform.resize(label, (self.output_size, self.output_size), mode='constant', order=0,
+                               preserve_range=True)
+        return {'imidx': imidx, 'image': img, 'label': lbl}
+class Rescale(object):
+    def __init__(self, output_size):
+        assert isinstance(output_size, (int, tuple))
+        self.output_size = output_size
+    def __call__(self, sample):
+        imidx, image, label = sample['imidx'], sample['image'], sample['label']
+        if random.random() >= 0.5:
+            image = image[::-1]
+            label = label[::-1]
+        h, w = image.shape[:2]
+        if isinstance(self.output_size, int):
+            if h > w:
+                new_h, new_w = self.output_size * h / w, self.output_size
+            else:
+                new_h, new_w = self.output_size, self.output_size * w / h
+        else:
+            new_h, new_w = self.output_size
+        new_h, new_w = int(new_h), int(new_w)
+        # #resize the image to new_h x new_w and convert image from range [0,255] to [0,1]
+        img = transform.resize(image, (new_h, new_w), mode='constant')
+        lbl = transform.resize(label, (new_h, new_w), mode='constant', order=0, preserve_range=True)
+        return {'imidx': imidx, 'image': img, 'label': lbl}
+class RandomCrop(object):
+    def __init__(self, output_size):
+        assert isinstance(output_size, (int, tuple))
+        if isinstance(output_size, int):
+            self.output_size = (output_size, output_size)
+        else:
+            assert len(output_size) == 2
+            self.output_size = output_size
+    def __call__(self, sample):
+        imidx, image, label = sample['imidx'], sample['image'], sample['label']
+        if random.random() >= 0.5:
+            image = image[::-1]
+            label = label[::-1]
+        h, w = image.shape[:2]
+        new_h, new_w = self.output_size
+        top = np.random.randint(0, h - new_h)
+        left = np.random.randint(0, w - new_w)
+        image = image[top: top + new_h, left: left + new_w]
+        label = label[top: top + new_h, left: left + new_w]
+        return {'imidx': imidx, 'image': image, 'label': label}
+class ToTensor(object):
+    """Convert ndarrays in sample to Tensors."""
+    def __call__(self, sample):
+        imidx, image, label = sample['imidx'], sample['image'], sample['label']
+        tmpImg = np.zeros((image.shape[0], image.shape[1], 3))
+        tmpLbl = np.zeros(label.shape)
+        image = image / np.max(image)
+        if (np.max(label) < 1e-6):
+            label = label
+        else:
+            label = label / np.max(label)
+        if image.shape[2] == 1:
+            tmpImg[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229
+            tmpImg[:, :, 1] = (image[:, :, 0] - 0.485) / 0.229
+            tmpImg[:, :, 2] = (image[:, :, 0] - 0.485) / 0.229
+        else:
+            tmpImg[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229
+            tmpImg[:, :, 1] = (image[:, :, 1] - 0.456) / 0.224
+            tmpImg[:, :, 2] = (image[:, :, 2] - 0.406) / 0.225
+        tmpLbl[:, :, 0] = label[:, :, 0]
+        tmpImg = tmpImg.transpose((2, 0, 1))
+        tmpLbl = label.transpose((2, 0, 1))
+        return {'imidx': torch.from_numpy(imidx), 'image': torch.from_numpy(tmpImg), 'label': torch.from_numpy(tmpLbl)}
+class ToTensorLab(object):
+    """Convert ndarrays in sample to Tensors."""
+    def __init__(self, flag=0):
+        self.flag = flag
+    def __call__(self, sample):
+        imidx, image, label = sample['imidx'], sample['image'], sample['label']
+        tmpLbl = np.zeros(label.shape)
+        if (np.max(label) < 1e-6):
+            label = label
+        else:
+            label = label / np.max(label)
+        # change the color space
+        if self.flag == 2:  # with rgb and Lab colors
+            tmpImg = np.zeros((image.shape[0], image.shape[1], 6))
+            tmpImgt = np.zeros((image.shape[0], image.shape[1], 3))
+            if image.shape[2] == 1:
+                tmpImgt[:, :, 0] = image[:, :, 0]
+                tmpImgt[:, :, 1] = image[:, :, 0]
+                tmpImgt[:, :, 2] = image[:, :, 0]
+            else:
+                tmpImgt = image
+            tmpImgtl = color.rgb2lab(tmpImgt)
+            # nomalize image to range [0,1]
+            tmpImg[:, :, 0] = (tmpImgt[:, :, 0] - np.min(tmpImgt[:, :, 0])) / (
+                    np.max(tmpImgt[:, :, 0]) - np.min(tmpImgt[:, :, 0]))
+            tmpImg[:, :, 1] = (tmpImgt[:, :, 1] - np.min(tmpImgt[:, :, 1])) / (
+                    np.max(tmpImgt[:, :, 1]) - np.min(tmpImgt[:, :, 1]))
+            tmpImg[:, :, 2] = (tmpImgt[:, :, 2] - np.min(tmpImgt[:, :, 2])) / (
+                    np.max(tmpImgt[:, :, 2]) - np.min(tmpImgt[:, :, 2]))
+            tmpImg[:, :, 3] = (tmpImgtl[:, :, 0] - np.min(tmpImgtl[:, :, 0])) / (
+                    np.max(tmpImgtl[:, :, 0]) - np.min(tmpImgtl[:, :, 0]))
+            tmpImg[:, :, 4] = (tmpImgtl[:, :, 1] - np.min(tmpImgtl[:, :, 1])) / (
+                    np.max(tmpImgtl[:, :, 1]) - np.min(tmpImgtl[:, :, 1]))
+            tmpImg[:, :, 5] = (tmpImgtl[:, :, 2] - np.min(tmpImgtl[:, :, 2])) / (
+                    np.max(tmpImgtl[:, :, 2]) - np.min(tmpImgtl[:, :, 2]))
+            # tmpImg = tmpImg/(np.max(tmpImg)-np.min(tmpImg))
+            tmpImg[:, :, 0] = (tmpImg[:, :, 0] - np.mean(tmpImg[:, :, 0])) / np.std(tmpImg[:, :, 0])
+            tmpImg[:, :, 1] = (tmpImg[:, :, 1] - np.mean(tmpImg[:, :, 1])) / np.std(tmpImg[:, :, 1])
+            tmpImg[:, :, 2] = (tmpImg[:, :, 2] - np.mean(tmpImg[:, :, 2])) / np.std(tmpImg[:, :, 2])
+            tmpImg[:, :, 3] = (tmpImg[:, :, 3] - np.mean(tmpImg[:, :, 3])) / np.std(tmpImg[:, :, 3])
+            tmpImg[:, :, 4] = (tmpImg[:, :, 4] - np.mean(tmpImg[:, :, 4])) / np.std(tmpImg[:, :, 4])
+            tmpImg[:, :, 5] = (tmpImg[:, :, 5] - np.mean(tmpImg[:, :, 5])) / np.std(tmpImg[:, :, 5])
+        elif self.flag == 1:  # with Lab color
+            tmpImg = np.zeros((image.shape[0], image.shape[1], 3))
+            if image.shape[2] == 1:
+                tmpImg[:, :, 0] = image[:, :, 0]
+                tmpImg[:, :, 1] = image[:, :, 0]
+                tmpImg[:, :, 2] = image[:, :, 0]
+            else:
+                tmpImg = image
+            tmpImg = color.rgb2lab(tmpImg)
+            # tmpImg = tmpImg/(np.max(tmpImg)-np.min(tmpImg))
+            tmpImg[:, :, 0] = (tmpImg[:, :, 0] - np.min(tmpImg[:, :, 0])) / (
+                    np.max(tmpImg[:, :, 0]) - np.min(tmpImg[:, :, 0]))
+            tmpImg[:, :, 1] = (tmpImg[:, :, 1] - np.min(tmpImg[:, :, 1])) / (
+                    np.max(tmpImg[:, :, 1]) - np.min(tmpImg[:, :, 1]))
+            tmpImg[:, :, 2] = (tmpImg[:, :, 2] - np.min(tmpImg[:, :, 2])) / (
+                    np.max(tmpImg[:, :, 2]) - np.min(tmpImg[:, :, 2]))
+            tmpImg[:, :, 0] = (tmpImg[:, :, 0] - np.mean(tmpImg[:, :, 0])) / np.std(tmpImg[:, :, 0])
+            tmpImg[:, :, 1] = (tmpImg[:, :, 1] - np.mean(tmpImg[:, :, 1])) / np.std(tmpImg[:, :, 1])
+            tmpImg[:, :, 2] = (tmpImg[:, :, 2] - np.mean(tmpImg[:, :, 2])) / np.std(tmpImg[:, :, 2])
+        else:  # with rgb color
+            tmpImg = np.zeros((image.shape[0], image.shape[1], 3))
+            image = image / np.max(image)
+            if image.shape[2] == 1:
+                tmpImg[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229
+                tmpImg[:, :, 1] = (image[:, :, 0] - 0.485) / 0.229
+                tmpImg[:, :, 2] = (image[:, :, 0] - 0.485) / 0.229
+            else:
+                tmpImg[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229
+                tmpImg[:, :, 1] = (image[:, :, 1] - 0.456) / 0.224
+                tmpImg[:, :, 2] = (image[:, :, 2] - 0.406) / 0.225
+        tmpLbl[:, :, 0] = label[:, :, 0]
+        tmpImg = tmpImg.transpose((2, 0, 1))
+        tmpLbl = label.transpose((2, 0, 1))
+        return {'imidx': torch.from_numpy(imidx), 'image': torch.from_numpy(tmpImg), 'label': torch.from_numpy(tmpLbl)}
+class SalObjDataset(Dataset):
+    def __init__(self, img_name_list, lbl_name_list, transform=None):
+        # self.root_dir = root_dir
+        # self.image_name_list = glob.glob(image_dir+'*.png')
+        # self.label_name_list = glob.glob(label_dir+'*.png')
+        self.image_name_list = img_name_list
+        self.label_name_list = lbl_name_list
+        self.transform = transform
+    def __len__(self):
+        return len(self.image_name_list)
+    def __getitem__(self, idx):
+        # image = Image.open(self.image_name_list[idx])#io.imread(self.image_name_list[idx])
+        # label = Image.open(self.label_name_list[idx])#io.imread(self.label_name_list[idx])
+        image = io.imread(self.image_name_list[idx])
+        imname = self.image_name_list[idx]
+        imidx = np.array([idx])
+        if (0 == len(self.label_name_list)):
+            label_3 = np.zeros(image.shape)
+        else:
+            label_3 = io.imread(self.label_name_list[idx])
+        label = np.zeros(label_3.shape[0:2])
+        if (3 == len(label_3.shape)):
+            label = label_3[:, :, 0]
+        elif (2 == len(label_3.shape)):
+            label = label_3
+        if (3 == len(image.shape) and 2 == len(label.shape)):
+            label = label[:, :, np.newaxis]
+        elif (2 == len(image.shape) and 2 == len(label.shape)):
+            image = image[:, :, np.newaxis]
+            label = label[:, :, np.newaxis]
+        sample = {'imidx': imidx, 'image': image, 'label': label}
+        if self.transform:
+            sample = self.transform(sample)
+        return sample

tryon/preprocessing/u2net/load_u2net.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+from collections import OrderedDict
+import torch
+from tryon.preprocessing.u2net import u2net_cloth_segm, u2net_human_segm
+def load_cloth_segm_model(device, checkpoint_path, in_ch=3, out_ch=1):
+    if not os.path.exists(checkpoint_path):
+        print("Invalid path")
+        return
+    model = u2net_cloth_segm.U2NET(in_ch=in_ch, out_ch=out_ch)
+    model_state_dict = torch.load(checkpoint_path, map_location=device)
+    new_state_dict = OrderedDict()
+    for k, v in model_state_dict.items():
+        name = k[7:]  # remove `module.`
+        new_state_dict[name] = v
+    model.load_state_dict(new_state_dict)
+    model = model.to(device=device)
+    print("Checkpoints loaded from path: {}".format(checkpoint_path))
+    return model
+def load_human_segm_model(device, model_name):
+    if model_name == 'u2net':
+        print("loading U2NET(173.6 MB)...")
+        net = u2net_human_segm.U2NET(3, 1)
+    elif model_name == 'u2netp':
+        print("loading U2NEP(4.7 MB)...")
+        net = u2net_human_segm.U2NETP(3, 1)
+    else:
+        net = None
+    if torch.cuda.is_available():
+        net.load_state_dict(torch.load(os.environ.get("U2NET_SEGM_CHECKPOINT_PATH")))
+        net.cuda()
+    else:
+        net.load_state_dict(torch.load(os.environ.get("U2NET_SEGM_CHECKPOINT_PATH"), map_location=device))
+    net.eval()
+    return net

tryon/preprocessing/u2net/u2net_cloth_segm.py ADDED Viewed

	@@ -0,0 +1,550 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dirate=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(
+            in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate
+        )
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+## upsample tensor 'src' to have the same spatial size with tensor 'tar'
+def _upsample_like(src, tar):
+    src = F.upsample(src, size=tar.shape[2:], mode="bilinear")
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):  # UNet07DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU7, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
+        hx6dup = _upsample_like(hx6d, hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        """
+        del hx1, hx2, hx3, hx4, hx5, hx6, hx7
+        del hx6d, hx5d, hx3d, hx2d
+        del hx2dup, hx3dup, hx4dup, hx5dup, hx6dup
+        """
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):  # UNet06DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        """
+        del hx1, hx2, hx3, hx4, hx5, hx6
+        del hx5d, hx4d, hx3d, hx2d
+        del hx2dup, hx3dup, hx4dup, hx5dup
+        """
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):  # UNet05DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        """
+        del hx1, hx2, hx3, hx4, hx5
+        del hx4d, hx3d, hx2d
+        del hx2dup, hx3dup, hx4dup
+        """
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):  # UNet04DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        """
+        del hx1, hx2, hx3, hx4
+        del hx3d, hx2d
+        del hx2dup, hx3dup
+        """
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):  # UNet04FRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
+        """
+        del hx1, hx2, hx3, hx4
+        del hx3d, hx2d
+        """
+        return hx1d + hxin
+##### U^2-Net ####
+class U2NET(nn.Module):
+    def __init__(self, in_ch=3, out_ch=1):
+        super(U2NET, self).__init__()
+        self.stage1 = RSU7(in_ch, 32, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 32, 128)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(128, 64, 256)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(256, 128, 512)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(512, 256, 512)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(512, 256, 512)
+        # decoder
+        self.stage5d = RSU4F(1024, 256, 512)
+        self.stage4d = RSU4(1024, 128, 256)
+        self.stage3d = RSU5(512, 64, 128)
+        self.stage2d = RSU6(256, 32, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.outconv = nn.Conv2d(6 * out_ch, out_ch, 1)
+    def forward(self, x):
+        hx = x
+        # stage 1
+        hx1 = self.stage1(hx)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # -------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2, d1)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3, d1)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4, d1)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5, d1)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, d1)
+        d0 = self.outconv(torch.cat((d1, d2, d3, d4, d5, d6), 1))
+        """
+        del hx1, hx2, hx3, hx4, hx5, hx6
+        del hx5d, hx4d, hx3d, hx2d, hx1d
+        del hx6up, hx5dup, hx4dup, hx3dup, hx2dup
+        """
+        return d0, d1, d2, d3, d4, d5, d6
+### U^2-Net small ###
+class U2NETP(nn.Module):
+    def __init__(self, in_ch=3, out_ch=1):
+        super(U2NETP, self).__init__()
+        self.stage1 = RSU7(in_ch, 16, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 16, 64)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(64, 16, 64)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(64, 16, 64)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(64, 16, 64)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(64, 16, 64)
+        # decoder
+        self.stage5d = RSU4F(128, 16, 64)
+        self.stage4d = RSU4(128, 16, 64)
+        self.stage3d = RSU5(128, 16, 64)
+        self.stage2d = RSU6(128, 16, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.outconv = nn.Conv2d(6 * out_ch, out_ch, 1)
+    def forward(self, x):
+        hx = x
+        # stage 1
+        hx1 = self.stage1(hx)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # decoder
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2, d1)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3, d1)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4, d1)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5, d1)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, d1)
+        d0 = self.outconv(torch.cat((d1, d2, d3, d4, d5, d6), 1))
+        return d0, d1, d2, d3, d4, d5, d6

tryon/preprocessing/u2net/u2net_human_segm.py ADDED Viewed

	@@ -0,0 +1,520 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dirate=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate)
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+## upsample tensor 'src' to have the same spatial size with tensor 'tar'
+def _upsample_like(src, tar):
+    src = F.upsample(src, size=tar.shape[2:], mode='bilinear')
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):  # UNet07DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU7, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
+        hx6dup = _upsample_like(hx6d, hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):  # UNet06DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):  # UNet05DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):  # UNet04DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):  # UNet04FRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
+        return hx1d + hxin
+##### U^2-Net ####
+class U2NET(nn.Module):
+    def __init__(self, in_ch=3, out_ch=1):
+        super(U2NET, self).__init__()
+        self.stage1 = RSU7(in_ch, 32, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 32, 128)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(128, 64, 256)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(256, 128, 512)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(512, 256, 512)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(512, 256, 512)
+        # decoder
+        self.stage5d = RSU4F(1024, 256, 512)
+        self.stage4d = RSU4(1024, 128, 256)
+        self.stage3d = RSU5(512, 64, 128)
+        self.stage2d = RSU6(256, 32, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.outconv = nn.Conv2d(6 * out_ch, out_ch, 1)
+    def forward(self, x):
+        hx = x
+        # stage 1
+        hx1 = self.stage1(hx)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # -------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2, d1)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3, d1)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4, d1)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5, d1)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, d1)
+        d0 = self.outconv(torch.cat((d1, d2, d3, d4, d5, d6), 1))
+        return F.sigmoid(d0), F.sigmoid(d1), F.sigmoid(d2), F.sigmoid(d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6)
+### U^2-Net small ###
+class U2NETP(nn.Module):
+    def __init__(self, in_ch=3, out_ch=1):
+        super(U2NETP, self).__init__()
+        self.stage1 = RSU7(in_ch, 16, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 16, 64)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(64, 16, 64)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(64, 16, 64)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(64, 16, 64)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(64, 16, 64)
+        # decoder
+        self.stage5d = RSU4F(128, 16, 64)
+        self.stage4d = RSU4(128, 16, 64)
+        self.stage3d = RSU5(128, 16, 64)
+        self.stage2d = RSU6(128, 16, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.outconv = nn.Conv2d(6 * out_ch, out_ch, 1)
+    def forward(self, x):
+        hx = x
+        # stage 1
+        hx1 = self.stage1(hx)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # decoder
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2, d1)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3, d1)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4, d1)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5, d1)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, d1)
+        d0 = self.outconv(torch.cat((d1, d2, d3, d4, d5, d6), 1))
+        return F.sigmoid(d0), F.sigmoid(d1), F.sigmoid(d2), F.sigmoid(d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6)

tryon/preprocessing/u2net/utils.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import torch
+def normPRED(d):
+    ma = torch.max(d)
+    mi = torch.min(d)
+    dn = (d - mi) / (ma - mi)
+    return dn

tryon/preprocessing/utils.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+from pathlib import Path
+import cv2
+from PIL import Image
+from torchvision import transforms
+class NormalizeImage(object):
+    """Normalize given tensor into given mean and standard dev
+    Args:
+        mean (float): Desired mean to substract from tensors
+        std (float): Desired std to divide from tensors
+    """
+    def __init__(self, mean, std):
+        assert isinstance(mean, (float))
+        if isinstance(mean, float):
+            self.mean = mean
+        if isinstance(std, float):
+            self.std = std
+        self.normalize_1 = transforms.Normalize(self.mean, self.std)
+        self.normalize_3 = transforms.Normalize([self.mean] * 3, [self.std] * 3)
+        self.normalize_18 = transforms.Normalize([self.mean] * 18, [self.std] * 18)
+    def __call__(self, image_tensor):
+        if image_tensor.shape[0] == 1:
+            return self.normalize_1(image_tensor)
+        elif image_tensor.shape[0] == 3:
+            return self.normalize_3(image_tensor)
+        elif image_tensor.shape[0] == 18:
+            return self.normalize_18(image_tensor)
+        else:
+            assert "Please set proper channels! Normalization implemented only for 1, 3 and 18"
+def naive_cutout(img, mask):
+    empty = Image.new("RGBA", (img.size), 0)
+    cutout = Image.composite(img, empty, mask.resize(img.size, Image.LANCZOS))
+    return cutout
+def resize_by_bigger_index(crop):
+    # function resizes and keeps the aspect ratio same
+    crop_shape = crop.shape  # hxwxc
+    if crop_shape[0] / crop_shape[1] <= 1.33:
+        resized_crop = image_resize(crop, width=768)
+    else:
+        resized_crop = image_resize(crop, height=1024)
+    return resized_crop
+def image_resize(image, width=None, height=None):
+    dim = None
+    (h, w) = image.shape[:2]
+    if width is None and height is None:
+        return image
+    if width is None:
+        r = height / float(h)
+        dim = (int(w * r), height)
+    else:
+        r = width / float(w)
+        dim = (width, int(h * r))
+    resized = cv2.resize(image, dim)
+    return resized
+def convert_to_jpg(image_path, output_dir, size=None):
+    """
+    Convert image to jpg format
+    :param image_path: image path
+    :param output_dir: output directory
+    :param size: desired size of the image (w, h)
+    """
+    img = cv2.imread(image_path)
+    if size is not None:
+        img = image_resize(img, width=size[0], height=size[1])
+    filename = Path(image_path).name
+    cv2.imwrite(os.path.join(output_dir, filename.split(".")[0] + ".jpg"), img)

tryondiffusion/__init__.py ADDED Viewed

File without changes

tryondiffusion/diffusion.py ADDED Viewed

	@@ -0,0 +1,275 @@

+import copy
+import logging
+import os
+import torch
+from torch.utils.data import DataLoader
+from torch import optim
+import torch.nn as nn
+from torch.nn import functional as F
+import cv2
+import numpy as np
+from .network import UNet64, UNet128
+from .utils import mk_folders, GaussianSmoothing, UNetDataset
+from .ema import EMA
+def smoothen_image(img, sigma):
+    # As suggested in:
+    # https://jmlr.csail.mit.edu/papers/volume23/21-0635/21-0635.pdf Section 4.4
+    smoothing2d = GaussianSmoothing(channels=3,
+                                    kernel_size=3,
+                                    sigma=sigma,
+                                    conv_dim=2)
+    img = F.pad(img, (1, 1, 1, 1), mode='reflect')
+    img = smoothing2d(img)
+    return img
+def schedule_lr(total_steps, start_lr=0.0, stop_lr=0.0001, pct_increasing_lr=0.02):
+    n = total_steps * pct_increasing_lr
+    n = round(n)
+    lambdas = list(np.linspace(start_lr, stop_lr, n))
+    constant_lr_list = [stop_lr] * (total_steps - n)
+    lambdas.extend(constant_lr_list)
+    return lambdas
+class Diffusion:
+    def __init__(self,
+                 device,
+                 pose_embed_dim,
+                 time_steps=256,
+                 beta_start=1e-4,
+                 beta_end=0.02,
+                 unet_dim=64,
+                 noise_input_channel=3,
+                 beta_ema=0.995):
+        self.time_steps = time_steps
+        self.beta_start = beta_start
+        self.beta_end = beta_end
+        self.beta = self.linear_beta_scheduler().to(device)
+        self.alpha = 1 - self.beta
+        self.alpha_cumprod = torch.cumprod(self.alpha, dim=0)
+        self.noise_input_channel = noise_input_channel
+        self.unet_dim = unet_dim
+        if unet_dim == 128:
+            self.net = UNet128(pose_embed_dim, device, time_steps).to(device)
+        elif unet_dim == 64:
+            self.net = UNet64(pose_embed_dim, device, time_steps).to(device)
+        self.ema_net = copy.deepcopy(self.net).eval().requires_grad_(False)
+        self.beta_ema = beta_ema
+        self.device = device
+    def linear_beta_scheduler(self):
+        return torch.linspace(self.beta_start, self.beta_end, self.time_steps)
+    def sample_time_steps(self, batch_size):
+        return torch.randint(low=1, high=self.time_steps, size=(batch_size,))
+    def add_noise_to_img(self, img, t):
+        sqrt_alpha_timestep = torch.sqrt(self.alpha_cumprod[t])[:, None, None, None]
+        sqrt_one_minus_alpha_timestep = torch.sqrt(1 - self.alpha_cumprod[t])[:, None, None, None]
+        epsilon = torch.randn_like(img)
+        return (sqrt_alpha_timestep * epsilon) + (sqrt_one_minus_alpha_timestep * epsilon), epsilon
+    @torch.inference_mode()
+    def sample(self, use_ema, conditional_inputs):
+        model = self.ema_net if use_ema else self.net
+        ic, jp, jg, ia = conditional_inputs
+        ic = ic.to(self.device)
+        jp = jp.to(self.device)
+        jg = jg.to(self.device)
+        ia = ia.to(self.device)
+        batch_size = len(ic)
+        logging.info(f"Running inference for {batch_size} images")
+        model.eval()
+        with torch.inference_mode():
+            # noise augmentation during testing as suggested in paper
+            sigma = float(torch.FloatTensor(1).uniform_(0.4, 0.6))
+            ia = smoothen_image(ia, sigma)
+            ic = smoothen_image(ic, sigma)
+            inp_network_noise = torch.randn(batch_size, self.noise_input_channel, self.unet_dim, self.unet_dim).to(self.device)
+            # paper says to add noise augmentation to input noise during inference
+            inp_network_noise = smoothen_image(inp_network_noise, sigma)
+            # concatenating noise with rgb agnostic image across channels
+            # corrupt -> concatenate -> predict
+            x = torch.cat((inp_network_noise, ia), dim=1)
+            for i in reversed(range(1, self.time_steps)):
+                t = (torch.ones(batch_size) * i).long().to(self.device)
+                predicted_noise = model(x, ic, jp, jg, t, sigma)
+                # ToDo: Add Classifier-Free Guidance with guidance weight 2
+                alpha = self.alpha[t][:, None, None, None]
+                alpha_cumprod = self.alpha_cumprod[t][:, None, None, None]
+                beta = self.beta[t][:, None, None, None]
+                if i > 1:
+                    noise = torch.randn_like(inp_network_noise)
+                else:
+                    noise = torch.zeros_like(inp_network_noise)
+                inp_network_noise = 1 / torch.sqrt(alpha) * (inp_network_noise - ((1 - alpha) / (torch.sqrt(1 - alpha_cumprod))) * predicted_noise) + torch.sqrt(beta) * noise
+        inp_network_noise = (inp_network_noise.clamp(-1, 1) + 1) / 2
+        inp_network_noise = (inp_network_noise * 255).type(torch.uint8)
+        return inp_network_noise
+    def prepare(self, args):
+        mk_folders(args.run_name)
+        train_dataset = UNetDataset(ip_dir=args.train_ip_folder,
+                                    jp_dir=args.train_jp_folder,
+                                    jg_dir=args.train_jg_folder,
+                                    ia_dir=args.train_ia_folder,
+                                    ic_dir=args.train_ic_folder,
+                                    unet_size=self.unet_dim)
+        validation_dataset = UNetDataset(ip_dir=args.validation_ip_folder,
+                                         jp_dir=args.validation_jp_folder,
+                                         jg_dir=args.validation_jg_folder,
+                                         ia_dir=args.validation_ia_folder,
+                                         ic_dir=args.validation_ic_folder,
+                                         unet_size=self.unet_dim)
+        self.train_dataloader = DataLoader(train_dataset, args.batch_size_train, shuffle=True)
+        # give args.batch_size_validation 1 while training
+        self.val_dataloader = DataLoader(validation_dataset, args.batch_size_validation, shuffle=True)
+        self.optimizer = optim.AdamW(self.net.parameters(), lr=args.lr, eps=1e-4)
+        self.scheduler = schedule_lr(total_steps=args.total_steps, start_lr=args.start_lr,
+                                     stop_lr=args.stop_lr, pct_increasing_lr=args.pct_increasing_lr)
+        self.mse = nn.MSELoss()
+        self.ema = EMA(self.beta_ema)
+        self.scaler = torch.cuda.amp.GradScaler()
+    def train_step(self, loss, running_step):
+        self.optimizer.zero_grad()
+        self.scaler.scale(loss).backward()
+        self.scaler.step(self.optimizer)
+        self.scaler.update()
+        self.ema.step_ema(self.ema_net, self.net)
+        for g in self.optimizer.param_groups:
+            g['lr'] = self.scheduler[running_step]
+    def single_epoch(self, train=True):
+        avg_loss = 0.
+        if train:
+            self.net.train()
+        else:
+            self.net.eval()
+        for ip, jp, jg, ia, ic in self.train_dataloader:
+            # noise augmentation
+            sigma = float(torch.FloatTensor(1).uniform_(0.4, 0.6))
+            ia = smoothen_image(ia, sigma)
+            ic = smoothen_image(ic, sigma)
+            with torch.autocast(self.device) and (torch.inference_mode() if not train else torch.enable_grad()):
+                ip = ip.to(self.device)
+                jp = jp.to(self.device)
+                jg = jg.to(self.device)
+                ia = ia.to(self.device)
+                ic = ic.to(self.device)
+                t = self.sample_time_steps(ip.shape[0]).to(self.device)
+                # corrupt -> concatenate -> predict
+                zt, noise_epsilon = self.add_noise_to_img(ip, t)
+                zt = torch.cat((zt, ia), dim=1)
+                # ToDO: Make conditional inputs null, at 10% of the training time,
+                # ToDo: for classifier-free guidance(GitHub Issue #21), with guidance weight 2.
+                predicted_noise = self.net(zt, ic, jp, jg, t, sigma)
+                loss = self.mse(noise_epsilon, predicted_noise)
+                avg_loss += loss
+            if train:
+                self.train_step(loss, self.running_train_steps)
+                # ToDo: Add logs to tensorboard as well
+                logging.info(
+                    f"train_mse_loss: {loss.item():2.3f}, learning_rate: {self.scheduler[self.running_train_steps]}")
+                self.running_train_steps += 1
+        return avg_loss.mean().item()
+    def logging_images(self, epoch, run_name):
+        for idx, (ip, jp, jg, ia, ic) in enumerate(self.val_dataloader):
+            # sampled image
+            sampled_image = self.sample(use_ema=False, conditional_inputs=(ic, jp, jg, ia))
+            sampled_image = sampled_image[0].permute(1, 2, 0).squeeze().cpu().numpy()
+            # ema sampled image
+            ema_sampled_image = self.sample(use_ema=True, conditional_inputs=(ic, jp, jg, ia))
+            ema_sampled_image = ema_sampled_image[0].permute(1, 2, 0).squeeze().cpu().numpy()
+            # base images
+            ip_np = ip[0].permute(1, 2, 0).squeeze().cpu().numpy()
+            ic_np = ic[0].permute(1, 2, 0).squeeze().cpu().numpy()
+            ia_np = ia[0].permute(1, 2, 0).squeeze().cpu().numpy()
+            # make to folders
+            os.makedirs(os.path.join("results", run_name, "images", f"{idx}_E{epoch}"), exist_ok=True)
+            # define folder paths
+            images_folder = os.path.join("results", run_name, "images", f"{idx}_E{epoch}")
+            # save base images
+            cv2.imwrite(os.path.join(images_folder, "ground_truth.png"), ip_np)
+            cv2.imwrite(os.path.join(images_folder, "segmented_garment.png"), ic_np)
+            cv2.imwrite(os.path.join(images_folder, "cloth_agnostic_rgb.png"), ia_np)
+            # save sampled image
+            cv2.imwrite(os.path.join(images_folder, "sampled_image.png"), sampled_image)
+            # save ema sampled image
+            cv2.imwrite(os.path.join(images_folder, "ema_sampled_image.png"), ema_sampled_image)
+    def save_models(self, run_name, epoch=-1):
+        torch.save(self.net.state_dict(), os.path.join("models", run_name, f"ckpt_{epoch}.pt"))
+        torch.save(self.ema_net.state_dict(), os.path.join("models", run_name, f"ema_ckpt_{epoch}.pt"))
+        torch.save(self.optimizer.state_dict(), os.path.join("models", run_name, f"optim_{epoch}.pt"))
+    def fit(self, args):
+        logging.info(f"Starting training")
+        data_len = len(self.train_dataloader)
+        epochs = round((args.total_steps * args.batch_size_train) / data_len)
+        if epochs < 0:
+            epochs = 1
+        self.running_train_steps = 0
+        for epoch in range(epochs):
+            logging.info(f"Starting Epoch: {epoch + 1}")
+            _ = self.single_epoch(train=True)
+            if (epoch + 1) % args.calculate_loss_frequency == 0:
+                avg_loss = self.single_epoch(train=False)
+                logging.info(f"Average Loss: {avg_loss}")
+            if (epoch + 1) % args.image_logging_frequency == 0:
+                self.logging_images(epoch, args.run_name)
+            if (epoch + 1) % args.model_saving_frequency == 0:
+                self.save_models(args.run_name, epoch)
+        logging.info(f"Training Done Successfully! Yayyy! Now let's hope for good results")