Muhammadidrees commited on
Commit
9c96817
·
verified ·
1 Parent(s): 0788c99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -1,20 +1,22 @@
 
1
  import gradio as gr
2
  import torch
3
  from diffusers import DiffusionPipeline
4
 
5
  # ---------------------- MODEL INITIALIZATION ----------------------
6
- # Load Flux-Kontext and OmniAvatar pipelines from Hugging Face Hub
7
- # Both are large models use torch_dtype and device_map for VRAM efficiency
 
8
  flux_model = DiffusionPipeline.from_pretrained(
9
  "black-forest-labs/FLUX.1-dev",
10
- torch_dtype=torch.float16,
11
- device_map="auto"
12
  )
13
 
14
  omni_model = DiffusionPipeline.from_pretrained(
15
  "tencent/OmniAvatar",
16
- torch_dtype=torch.float16,
17
- device_map="auto"
18
  )
19
 
20
  # ---------------------- MAIN GENERATION FUNCTION ----------------------
@@ -24,7 +26,7 @@ def generate_video(image, audio, prompt, style="claymation"):
24
  omni_model.to(device)
25
 
26
  try:
27
- # Step 1: Stylize character image using FLUX-Kontext diffusion model
28
  stylized_image = flux_model(
29
  prompt=prompt,
30
  image=image,
@@ -32,14 +34,14 @@ def generate_video(image, audio, prompt, style="claymation"):
32
  num_inference_steps=30
33
  ).images[0]
34
 
35
- # Step 2: Animate the stylized image with lip-sync using OmniAvatar
36
  result = omni_model(
37
  image=stylized_image,
38
  audio=audio,
39
  style=style,
40
  )
41
 
42
- # The model should return a dictionary with "video" or similar key
43
  if isinstance(result, dict) and "video" in result:
44
  return result["video"]
45
  elif hasattr(result, "videos"):
@@ -85,3 +87,5 @@ with gr.Blocks(title="🎭 Claymation Talking Avatar Generator") as demo:
85
 
86
  # ---------------------- LAUNCH ----------------------
87
  demo.queue().launch(debug=True, share=False)
 
 
 
1
+ ```python
2
  import gradio as gr
3
  import torch
4
  from diffusers import DiffusionPipeline
5
 
6
  # ---------------------- MODEL INITIALIZATION ----------------------
7
+ # Use 'balanced' for multi-device setups and CPU fallback for Spaces without GPU
8
+ device_map = "balanced" if torch.cuda.is_available() else "cpu"
9
+
10
  flux_model = DiffusionPipeline.from_pretrained(
11
  "black-forest-labs/FLUX.1-dev",
12
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
13
+ device_map=device_map
14
  )
15
 
16
  omni_model = DiffusionPipeline.from_pretrained(
17
  "tencent/OmniAvatar",
18
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
19
+ device_map=device_map
20
  )
21
 
22
  # ---------------------- MAIN GENERATION FUNCTION ----------------------
 
26
  omni_model.to(device)
27
 
28
  try:
29
+ # Step 1: Stylize input image using FLUX-Kontext
30
  stylized_image = flux_model(
31
  prompt=prompt,
32
  image=image,
 
34
  num_inference_steps=30
35
  ).images[0]
36
 
37
+ # Step 2: Animate the stylized image with OmniAvatar
38
  result = omni_model(
39
  image=stylized_image,
40
  audio=audio,
41
  style=style,
42
  )
43
 
44
+ # Return the generated video if available
45
  if isinstance(result, dict) and "video" in result:
46
  return result["video"]
47
  elif hasattr(result, "videos"):
 
87
 
88
  # ---------------------- LAUNCH ----------------------
89
  demo.queue().launch(debug=True, share=False)
90
+ ```
91
+