Spaces:

InstantX
/

InstantCharacter

Running on Zero

App Files Files Community

New Release

by mastersubhajit - opened Nov 20, 2025

base: refs/heads/main

←

from: refs/pr/5

Discussion Files changed

+51

-47

Files changed (4) hide show

.gitattributes +2 -0
app.py +43 -47
assets/boy.png +3 -0
assets/girl.png +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/boy.png filter=lfs diff=lfs merge=lfs -text
+assets/girl.png filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -17,8 +17,15 @@ from pipeline import InstantCharacterFluxPipeline
 # global variable
 MAX_SEED = np.iinfo(np.int32).max
-device = "cuda" if torch.cuda.is_available() else "cpu"
-dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
 # pre-trained weights
 ip_adapter_path = hf_hub_download(repo_id="tencent/InstantCharacter", filename="instantcharacter_ip-adapter.bin")
@@ -42,7 +49,7 @@ pipe.init_adapter(
 # load matting model
 birefnet = AutoModelForImageSegmentation.from_pretrained(birefnet_path, trust_remote_code=True)
-birefnet.to('cuda')
 birefnet.eval()
 birefnet_transform_image = transforms.Compose([
     transforms.Resize((1024, 1024)),
@@ -54,7 +61,7 @@ birefnet_transform_image = transforms.Compose([
 def remove_bkg(subject_image):
     def infer_matting(img_pil):
-        input_images = birefnet_transform_image(img_pil).unsqueeze(0).to('cuda')
         with torch.no_grad():
             preds = birefnet(input_images)[-1].sigmoid().cpu()
@@ -158,8 +165,8 @@ def get_example():
     ]
     return case
 def run_for_examples(source_image, prompt, scale, style_mode):
     return create_image(
         input_image=source_image,
         prompt=prompt,
@@ -218,11 +225,11 @@ def create_image(input_image,
 # Description
 title = r"""
-<h1 align="center">InstantCharacter : Personalize Any Characters with a Scalable Diffusion Transformer Framework</h1>
 """
 description = r"""
-<b>Official 🤗 Gradio demo</b> for <a href='https://instantcharacter.github.io/' target='_blank'><b>InstantCharacter : Personalize Any Characters with a Scalable Diffusion Transformer Framework</b></a>.<br>
 How to use:<br>
 1. Upload a character image, removing background would be preferred.
 2. Enter a text prompt to describe what you hope the chracter does.
@@ -234,14 +241,7 @@ article = r"""
 ---
 📝 **Citation**
 <br>
-If our work is helpful for your research or applications, please cite us via:
-```bibtex
-@article{tao2025instantcharacter,
-  title={InstantCharacter: Personalize Any Characters with a Scalable Diffusion Transformer Framework},
-  author={Tao, Jiale and Zhang, Yanbing and Wang, Qixun and Cheng, Yiji and Wang, Haofan and Bai, Xu and Zhou, Zhengguang and Li, Ruihuang and Wang, Linqing and Wang, Chunyu and others},
-  journal={arXiv preprint arXiv:2504.12395},
-  year={2025}
-}
 ```
 📧 **Contact**
 <br>
@@ -269,42 +269,38 @@ with block:
                 style_mode = gr.Dropdown(label='Style', choices=[None, 'Makoto Shinkai style', 'Ghibli style'], value='Makoto Shinkai style')
                 with gr.Accordion(open=False, label="Advanced Options"):
-                    guidance_scale = gr.Slider(minimum=1,maximum=7.0, step=0.01,value=3.5, label="guidance scale")
-                    num_inference_steps = gr.Slider(minimum=5,maximum=50.0, step=1.0,value=28, label="num inference steps")
-                    seed = gr.Slider(minimum=-1000000, maximum=1000000, value=123456, step=1, label="Seed Value")
-                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                generate_button = gr.Button("Generate Image")
             with gr.Column():
-                generated_image = gr.Gallery(label="Generated Image")
-        generate_button.click(
-            fn=randomize_seed_fn,
-            inputs=[seed, randomize_seed],
-            outputs=seed,
-            queue=False,
-            api_name=False,
-        ).then(
-            fn=create_image,
-            inputs=[image_pil,
-                    prompt,
-                    scale,
-                    guidance_scale,
-                    num_inference_steps,
-                    seed,
-                    style_mode,
-                    ],
-            outputs=[generated_image])
-    gr.Examples(
-        examples=get_example(),
-        inputs=[image_pil, prompt, scale, style_mode],
-        fn=run_for_examples,
-        outputs=[generated_image],
-        cache_examples=True,
     )
     gr.Markdown(article)
-block.launch()

 # global variable
 MAX_SEED = np.iinfo(np.int32).max
+if torch.cuda.is_available():
+    device = "cuda"
+    dtype = torch.float16
+elif torch.backends.mps.is_available():
+    device = "mps"
+    dtype = torch.float16
+else:
+    device = "cpu"
+    dtype = torch.float32
 # pre-trained weights
 ip_adapter_path = hf_hub_download(repo_id="tencent/InstantCharacter", filename="instantcharacter_ip-adapter.bin")
 # load matting model
 birefnet = AutoModelForImageSegmentation.from_pretrained(birefnet_path, trust_remote_code=True)
+birefnet.to(device)
 birefnet.eval()
 birefnet_transform_image = transforms.Compose([
     transforms.Resize((1024, 1024)),
 def remove_bkg(subject_image):
     def infer_matting(img_pil):
+        input_images = birefnet_transform_image(img_pil).unsqueeze(0).to(device)
         with torch.no_grad():
             preds = birefnet(input_images)[-1].sigmoid().cpu()
     ]
     return case
+@spaces.GPU
 def run_for_examples(source_image, prompt, scale, style_mode):
     return create_image(
         input_image=source_image,
         prompt=prompt,
 # Description
 title = r"""
+<h1 align="center">ConstCharacter : Personalize Any Characters with a Scalable Diffusion Transformer Framework</h1>
 """
 description = r"""
+<b>Official 🤗 Gradio demo</b> for <a href='https://mastersubhajit.com/' target='_blank'><b>InstantCharacter : Personalize Any Characters with a Scalable Diffusion Transformer Framework</b></a>.<br>
 How to use:<br>
 1. Upload a character image, removing background would be preferred.
 2. Enter a text prompt to describe what you hope the chracter does.
 ---
 📝 **Citation**
 <br>
+If our work is helpful for your research or applications, please cite us when we publish the paper:
 ```
 📧 **Contact**
 <br>
                 style_mode = gr.Dropdown(label='Style', choices=[None, 'Makoto Shinkai style', 'Ghibli style'], value='Makoto Shinkai style')
                 with gr.Accordion(open=False, label="Advanced Options"):
+                    guidance_scale = gr.Slider(minimum=1,maximum=7.0, step=0.01,value=3.5, label="Guidance Scale")
+                    num_inference_steps = gr.Slider(minimum=10, maximum=50, step=1, value=28, label="Number of Inference Steps")
+                    seed = gr.Slider(minimum=-1, maximum=2147483647, step=1, value=123456, label="Seed")
+                    randomize_seed = gr.Checkbox(False, label="Randomize seed")
+                submit = gr.Button("Submit", variant="primary")
             with gr.Column():
+                result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery", columns=1, rows=1, height=512)
+        gr.Examples(
+            examples=get_example(),
+            inputs=[image_pil, prompt, scale, style_mode],
+            outputs=[result_gallery],
+            fn=run_for_examples,
+            cache_examples=True,
+        )
+    submit.click(
+        fn=randomize_seed_fn,
+        inputs=[seed, randomize_seed],
+        outputs=seed,
+        queue=False,
+        api_name=False,
+    ).then(
+        fn=create_image,
+        inputs=[image_pil, prompt, scale, guidance_scale, num_inference_steps, seed, style_mode],
+        outputs=[result_gallery],
+        api_name="run",
     )
     gr.Markdown(article)
+if __name__ == "__main__":
+    block.launch()

assets/boy.png ADDED Viewed

Git LFS Details

SHA256: 3dd64a54d40029638b093d4897ba7f79e1bbe9b66fbd314f001a24bd08c2a018
Pointer size: 132 Bytes
Size of remote file: 2.86 MB

assets/girl.png ADDED Viewed

Git LFS Details

SHA256: e57dd54865a599d3a613df21e099d85b445b8a3b53efea54aaf019fbe2bda924
Pointer size: 132 Bytes
Size of remote file: 1.24 MB