Spaces:

osmr
/

ghibli-lora

Sleeping

App Files Files Community

osmr commited on Apr 29, 2025

Commit

91a06e4

1 Parent(s): b06f841

Add controlnet support

Browse files

Files changed (2) hide show

.gitignore +186 -0
app.py +127 -41

.gitignore ADDED Viewed

	@@ -0,0 +1,186 @@

+yolov8n.pt
+lora_ghibli/
+# PyCharm ###
+.idea/
+# Visual Studio ###
+Release/
+Debug/
+.vs/
+*.VC.db
+*.sdf
+*.suo
+*.opendb
+*.psess
+*.vsp
+*.vspx
+*.sln
+*.pyproj
+x64
+# R ###
+.Rhistory
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import random
 from typing import Optional
 # import spaces #[uncomment to use ZeroGPU]
-from diffusers import StableDiffusionPipeline
 import torch
@@ -23,6 +23,8 @@ DEFAULT_HEIGHT = 512
 DEFAULT_GS = 7.5
 DEFAULT_LS = 1.0
 DEFAULT_NUM_INF_STEPS = 50
 # @spaces.GPU #[uncomment to use ZeroGPU]
@@ -36,58 +38,100 @@ def infer(lora_model_id: Optional[str] = "osmr/stable-diffusion-v1-4-lora-iv-ghi
           guidance_scale: Optional[float] = DEFAULT_GS,
           lora_scale: Optional[float] = DEFAULT_LS,
           num_inference_steps: Optional[int] = DEFAULT_NUM_INF_STEPS,
           progress = gr.Progress(track_tqdm=True)):
-    if lora_model_id == "osmr/stable-diffusion-v1-4-lora-iv-ghibli":
-        model_id = "CompVis/stable-diffusion-v1-4"
-    elif lora_model_id == "osmr/stable-diffusion-v1-4-lora-db-ghibli":
-        model_id = "CompVis/stable-diffusion-v1-4"
-    elif lora_model_id == "osmr/stable-diffusion-v1-5-lora-iv-ghibli":
-        model_id = "runwayml/stable-diffusion-v1-5"
-    elif lora_model_id == "osmr/stable-diffusion-v1-5-lora-db-ghibli":
-        model_id = "runwayml/stable-diffusion-v1-5"
-    elif lora_model_id == "CompVis/stable-diffusion-v1-4":
-        model_id = "CompVis/stable-diffusion-v1-4"
-        lora_model_id = None
-    elif lora_model_id == "runwayml/stable-diffusion-v1-5":
-        model_id = "runwayml/stable-diffusion-v1-5"
         lora_model_id = None
     else:
-        model_id = "CompVis/stable-diffusion-v1-4"
-        lora_model_id = None
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
-    pipe = StableDiffusionPipeline.from_pretrained(
-        pretrained_model_name_or_path=model_id,
-        torch_dtype=torch_dtype)
-    if lora_model_id:
         pipe.load_lora_weights(lora_model_id)
     pipe = pipe.to(device)
-    if lora_model_id:
-        image = pipe(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            guidance_scale=guidance_scale,
-            num_inference_steps=num_inference_steps,
-            width=width,
-            height=height,
-            generator=generator,
-            cross_attention_kwargs={"scale": lora_scale}
-        ).images[0]
-    else:
-        image = pipe(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            guidance_scale=guidance_scale,
-            num_inference_steps=num_inference_steps,
-            width=width,
-            height=height,
-            generator=generator,
-        ).images[0]
     return image, seed
@@ -96,6 +140,7 @@ examples = [
     "GBL, a man and a woman sitting at a table with glasses of wine in front of them",
     "a man and a woman sitting at a table with glasses of wine in front of them",
     "GBL, a man sitting at a desk in a library with a book open in front of him",
 ]
 css = """
@@ -199,6 +244,42 @@ with gr.Blocks(css=css) as demo:
                     value=DEFAULT_LS,
                 )
         gr.Examples(examples=examples, inputs=[prompt])
     gr.on(
         triggers=[run_button.click, prompt.submit],
@@ -214,6 +295,11 @@ with gr.Blocks(css=css) as demo:
             guidance_scale,
             lora_scale,
             num_inference_steps,
         ],
         outputs=[result, seed],
     )

 from typing import Optional
 # import spaces #[uncomment to use ZeroGPU]
+from diffusers import StableDiffusionPipeline, StableDiffusionControlNetPipeline, ControlNetModel
 import torch
 DEFAULT_GS = 7.5
 DEFAULT_LS = 1.0
 DEFAULT_NUM_INF_STEPS = 50
+DEFAULT_CN_COND_SCALE = 1.0
+DEFAULT_IPA_SCALE = 0.5
 # @spaces.GPU #[uncomment to use ZeroGPU]
           guidance_scale: Optional[float] = DEFAULT_GS,
           lora_scale: Optional[float] = DEFAULT_LS,
           num_inference_steps: Optional[int] = DEFAULT_NUM_INF_STEPS,
+          controlnet_type: str = "Edge-Detection",
+          controlnet_cond_scale: float = DEFAULT_CN_COND_SCALE,
+          controlnet_image: object = None,
+          ipadapter_scale: float = DEFAULT_IPA_SCALE,
+          ipadapter_image: object = None,
           progress = gr.Progress(track_tqdm=True)):
+    use_lora = (lora_model_id in [
+        "osmr/stable-diffusion-v1-4-lora-iv-ghibli",
+        "osmr/stable-diffusion-v1-4-lora-db-ghibli",
+        "osmr/stable-diffusion-v1-5-lora-iv-ghibli",
+        "osmr/stable-diffusion-v1-5-lora-db-ghibli",
+    ])
+    if not use_lora:
+        model_id = lora_model_id
         lora_model_id = None
     else:
+        if lora_model_id == "osmr/stable-diffusion-v1-4-lora-iv-ghibli":
+            model_id = "CompVis/stable-diffusion-v1-4"
+        elif lora_model_id == "osmr/stable-diffusion-v1-4-lora-db-ghibli":
+            model_id = "CompVis/stable-diffusion-v1-4"
+        elif lora_model_id == "osmr/stable-diffusion-v1-5-lora-iv-ghibli":
+            model_id = "runwayml/stable-diffusion-v1-5"
+        elif lora_model_id == "osmr/stable-diffusion-v1-5-lora-db-ghibli":
+            model_id = "runwayml/stable-diffusion-v1-5"
+        else:
+            model_id = lora_model_id
+            lora_model_id = None
+    sd_version = "1.5" if (model_id == "runwayml/stable-diffusion-v1-5") else "1.4"
+    use_controlnet = (controlnet_image is not None)
+    use_ipadapter = (ipadapter_image is not None)
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
+    if use_controlnet:
+        if sd_version == "1.4":
+            if controlnet_type == "Edge-Detection":
+                controlnet_id = "lllyasviel/sd-controlnet-canny"
+            else:
+                controlnet_id = "lllyasviel/sd-controlnet-openpose"
+        else:
+            if controlnet_type == "Edge-Detection":
+                controlnet_id = "lllyasviel/control_v11p_sd15_canny"
+            else:
+                controlnet_id = "lllyasviel/control_v11p_sd15_openpose"
+        controlnet = ControlNetModel.from_pretrained(
+            pretrained_model_name_or_path=controlnet_id,
+            torch_dtype=torch_dtype)
+        pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            pretrained_model_name_or_path=model_id,
+            controlnet=controlnet,
+            torch_dtype=torch_dtype)
+    else:
+        pipe = StableDiffusionPipeline.from_pretrained(
+            pretrained_model_name_or_path=model_id,
+            torch_dtype=torch_dtype)
+    if use_ipadapter:
+        pipe.load_ip_adapter(
+            "h94/IP-Adapter",
+            subfolder="models",
+            weight_name="ip-adapter_sd15.bin")
+        pipe.set_ip_adapter_scale(ipadapter_scale)
+    if use_lora:
         pipe.load_lora_weights(lora_model_id)
+        cross_attention_kwargs = {"scale": lora_scale}
+    else:
+        cross_attention_kwargs = None
     pipe = pipe.to(device)
+    image = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps,
+        width=width,
+        height=height,
+        generator=generator,
+        cross_attention_kwargs=cross_attention_kwargs,
+        image=controlnet_image,
+        controlnet_conditioning_scale=(float(controlnet_cond_scale) if use_controlnet else None),
+        ip_adapter_image=ipadapter_image
+    ).images[0]
     return image, seed
     "GBL, a man and a woman sitting at a table with glasses of wine in front of them",
     "a man and a woman sitting at a table with glasses of wine in front of them",
     "GBL, a man sitting at a desk in a library with a book open in front of him",
+    "GBL, a cartoon woman is standing in front of a wall",
 ]
 css = """
                     value=DEFAULT_LS,
                 )
+        with gr.Accordion("ControlNet Settings", open=False):
+            controlnet_type = gr.Dropdown(
+                choices=[
+                    "Edge-Detection",
+                    "Pose-Estimation"],
+                interactive=True,
+                label="ControlNet Type",
+            )
+            controlnet_cond_scale = gr.Slider(
+                label="ControlNet Conditioning Scale",
+                minimum=0.0,
+                maximum=2.0,
+                step=0.1,
+                value=DEFAULT_CN_COND_SCALE
+            )
+            controlnet_image = gr.Image(
+                label="Control Image",
+                type="pil",
+                show_label=True)
+        with gr.Accordion("IP-adapter Settings", open=False):
+            ipadapter_scale = gr.Slider(
+                label="IP-adapter Scale",
+                minimum=0.0,
+                maximum=1.0,
+                step=0.1,
+                value=DEFAULT_IPA_SCALE
+            )
+            ipadapter_image = gr.Image(
+                label="IP-adapter Image",
+                type="pil",
+                show_label=True)
         gr.Examples(examples=examples, inputs=[prompt])
     gr.on(
         triggers=[run_button.click, prompt.submit],
             guidance_scale,
             lora_scale,
             num_inference_steps,
+            controlnet_type,
+            controlnet_cond_scale,
+            controlnet_image,
+            ipadapter_scale,
+            ipadapter_image,
         ],
         outputs=[result, seed],
     )