Files changed (4) hide show
  1. .gitattributes +2 -0
  2. app.py +43 -47
  3. assets/boy.png +3 -0
  4. assets/girl.png +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/boy.png filter=lfs diff=lfs merge=lfs -text
37
+ assets/girl.png filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -17,8 +17,15 @@ from pipeline import InstantCharacterFluxPipeline
17
 
18
  # global variable
19
  MAX_SEED = np.iinfo(np.int32).max
20
- device = "cuda" if torch.cuda.is_available() else "cpu"
21
- dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
 
 
 
 
 
 
 
22
 
23
  # pre-trained weights
24
  ip_adapter_path = hf_hub_download(repo_id="tencent/InstantCharacter", filename="instantcharacter_ip-adapter.bin")
@@ -42,7 +49,7 @@ pipe.init_adapter(
42
 
43
  # load matting model
44
  birefnet = AutoModelForImageSegmentation.from_pretrained(birefnet_path, trust_remote_code=True)
45
- birefnet.to('cuda')
46
  birefnet.eval()
47
  birefnet_transform_image = transforms.Compose([
48
  transforms.Resize((1024, 1024)),
@@ -54,7 +61,7 @@ birefnet_transform_image = transforms.Compose([
54
  def remove_bkg(subject_image):
55
 
56
  def infer_matting(img_pil):
57
- input_images = birefnet_transform_image(img_pil).unsqueeze(0).to('cuda')
58
 
59
  with torch.no_grad():
60
  preds = birefnet(input_images)[-1].sigmoid().cpu()
@@ -158,8 +165,8 @@ def get_example():
158
  ]
159
  return case
160
 
 
161
  def run_for_examples(source_image, prompt, scale, style_mode):
162
-
163
  return create_image(
164
  input_image=source_image,
165
  prompt=prompt,
@@ -218,11 +225,11 @@ def create_image(input_image,
218
 
219
  # Description
220
  title = r"""
221
- <h1 align="center">InstantCharacter : Personalize Any Characters with a Scalable Diffusion Transformer Framework</h1>
222
  """
223
 
224
  description = r"""
225
- <b>Official πŸ€— Gradio demo</b> for <a href='https://instantcharacter.github.io/' target='_blank'><b>InstantCharacter : Personalize Any Characters with a Scalable Diffusion Transformer Framework</b></a>.<br>
226
  How to use:<br>
227
  1. Upload a character image, removing background would be preferred.
228
  2. Enter a text prompt to describe what you hope the chracter does.
@@ -234,14 +241,7 @@ article = r"""
234
  ---
235
  πŸ“ **Citation**
236
  <br>
237
- If our work is helpful for your research or applications, please cite us via:
238
- ```bibtex
239
- @article{tao2025instantcharacter,
240
- title={InstantCharacter: Personalize Any Characters with a Scalable Diffusion Transformer Framework},
241
- author={Tao, Jiale and Zhang, Yanbing and Wang, Qixun and Cheng, Yiji and Wang, Haofan and Bai, Xu and Zhou, Zhengguang and Li, Ruihuang and Wang, Linqing and Wang, Chunyu and others},
242
- journal={arXiv preprint arXiv:2504.12395},
243
- year={2025}
244
- }
245
  ```
246
  πŸ“§ **Contact**
247
  <br>
@@ -269,42 +269,38 @@ with block:
269
  style_mode = gr.Dropdown(label='Style', choices=[None, 'Makoto Shinkai style', 'Ghibli style'], value='Makoto Shinkai style')
270
 
271
  with gr.Accordion(open=False, label="Advanced Options"):
272
- guidance_scale = gr.Slider(minimum=1,maximum=7.0, step=0.01,value=3.5, label="guidance scale")
273
- num_inference_steps = gr.Slider(minimum=5,maximum=50.0, step=1.0,value=28, label="num inference steps")
274
- seed = gr.Slider(minimum=-1000000, maximum=1000000, value=123456, step=1, label="Seed Value")
275
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
276
-
277
- generate_button = gr.Button("Generate Image")
278
 
 
 
279
  with gr.Column():
280
- generated_image = gr.Gallery(label="Generated Image")
281
-
282
- generate_button.click(
283
- fn=randomize_seed_fn,
284
- inputs=[seed, randomize_seed],
285
- outputs=seed,
286
- queue=False,
287
- api_name=False,
288
- ).then(
289
- fn=create_image,
290
- inputs=[image_pil,
291
- prompt,
292
- scale,
293
- guidance_scale,
294
- num_inference_steps,
295
- seed,
296
- style_mode,
297
- ],
298
- outputs=[generated_image])
299
 
300
- gr.Examples(
301
- examples=get_example(),
302
- inputs=[image_pil, prompt, scale, style_mode],
303
- fn=run_for_examples,
304
- outputs=[generated_image],
305
- cache_examples=True,
 
 
 
 
 
306
  )
307
 
308
  gr.Markdown(article)
309
 
310
- block.launch()
 
 
17
 
18
  # global variable
19
  MAX_SEED = np.iinfo(np.int32).max
20
+ if torch.cuda.is_available():
21
+ device = "cuda"
22
+ dtype = torch.float16
23
+ elif torch.backends.mps.is_available():
24
+ device = "mps"
25
+ dtype = torch.float16
26
+ else:
27
+ device = "cpu"
28
+ dtype = torch.float32
29
 
30
  # pre-trained weights
31
  ip_adapter_path = hf_hub_download(repo_id="tencent/InstantCharacter", filename="instantcharacter_ip-adapter.bin")
 
49
 
50
  # load matting model
51
  birefnet = AutoModelForImageSegmentation.from_pretrained(birefnet_path, trust_remote_code=True)
52
+ birefnet.to(device)
53
  birefnet.eval()
54
  birefnet_transform_image = transforms.Compose([
55
  transforms.Resize((1024, 1024)),
 
61
  def remove_bkg(subject_image):
62
 
63
  def infer_matting(img_pil):
64
+ input_images = birefnet_transform_image(img_pil).unsqueeze(0).to(device)
65
 
66
  with torch.no_grad():
67
  preds = birefnet(input_images)[-1].sigmoid().cpu()
 
165
  ]
166
  return case
167
 
168
+ @spaces.GPU
169
  def run_for_examples(source_image, prompt, scale, style_mode):
 
170
  return create_image(
171
  input_image=source_image,
172
  prompt=prompt,
 
225
 
226
  # Description
227
  title = r"""
228
+ <h1 align="center">ConstCharacter : Personalize Any Characters with a Scalable Diffusion Transformer Framework</h1>
229
  """
230
 
231
  description = r"""
232
+ <b>Official πŸ€— Gradio demo</b> for <a href='https://mastersubhajit.com/' target='_blank'><b>InstantCharacter : Personalize Any Characters with a Scalable Diffusion Transformer Framework</b></a>.<br>
233
  How to use:<br>
234
  1. Upload a character image, removing background would be preferred.
235
  2. Enter a text prompt to describe what you hope the chracter does.
 
241
  ---
242
  πŸ“ **Citation**
243
  <br>
244
+ If our work is helpful for your research or applications, please cite us when we publish the paper:
 
 
 
 
 
 
 
245
  ```
246
  πŸ“§ **Contact**
247
  <br>
 
269
  style_mode = gr.Dropdown(label='Style', choices=[None, 'Makoto Shinkai style', 'Ghibli style'], value='Makoto Shinkai style')
270
 
271
  with gr.Accordion(open=False, label="Advanced Options"):
272
+ guidance_scale = gr.Slider(minimum=1,maximum=7.0, step=0.01,value=3.5, label="Guidance Scale")
273
+ num_inference_steps = gr.Slider(minimum=10, maximum=50, step=1, value=28, label="Number of Inference Steps")
274
+ seed = gr.Slider(minimum=-1, maximum=2147483647, step=1, value=123456, label="Seed")
275
+ randomize_seed = gr.Checkbox(False, label="Randomize seed")
 
 
276
 
277
+ submit = gr.Button("Submit", variant="primary")
278
+
279
  with gr.Column():
280
+ result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery", columns=1, rows=1, height=512)
281
+
282
+ gr.Examples(
283
+ examples=get_example(),
284
+ inputs=[image_pil, prompt, scale, style_mode],
285
+ outputs=[result_gallery],
286
+ fn=run_for_examples,
287
+ cache_examples=True,
288
+ )
 
 
 
 
 
 
 
 
 
 
289
 
290
+ submit.click(
291
+ fn=randomize_seed_fn,
292
+ inputs=[seed, randomize_seed],
293
+ outputs=seed,
294
+ queue=False,
295
+ api_name=False,
296
+ ).then(
297
+ fn=create_image,
298
+ inputs=[image_pil, prompt, scale, guidance_scale, num_inference_steps, seed, style_mode],
299
+ outputs=[result_gallery],
300
+ api_name="run",
301
  )
302
 
303
  gr.Markdown(article)
304
 
305
+ if __name__ == "__main__":
306
+ block.launch()
assets/boy.png ADDED

Git LFS Details

  • SHA256: 3dd64a54d40029638b093d4897ba7f79e1bbe9b66fbd314f001a24bd08c2a018
  • Pointer size: 132 Bytes
  • Size of remote file: 2.86 MB
assets/girl.png ADDED

Git LFS Details

  • SHA256: e57dd54865a599d3a613df21e099d85b445b8a3b53efea54aaf019fbe2bda924
  • Pointer size: 132 Bytes
  • Size of remote file: 1.24 MB