Muhammadidrees commited on
Commit
2998aed
Β·
verified Β·
1 Parent(s): 5ec197c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from diffusers import DiffusionPipeline
4
+
5
+ # ---------------------- MODEL INITIALIZATION ----------------------
6
+ # Load Flux-Kontext and OmniAvatar pipelines from Hugging Face Hub
7
+ # Both are large models β€” use torch_dtype and device_map for VRAM efficiency
8
+ flux_model = DiffusionPipeline.from_pretrained(
9
+ "black-forest-labs/FLUX.1-dev",
10
+ torch_dtype=torch.float16,
11
+ device_map="auto"
12
+ )
13
+
14
+ omni_model = DiffusionPipeline.from_pretrained(
15
+ "tencent/OmniAvatar",
16
+ torch_dtype=torch.float16,
17
+ device_map="auto"
18
+ )
19
+
20
+ # ---------------------- MAIN GENERATION FUNCTION ----------------------
21
+ def generate_video(image, audio, prompt, style="claymation"):
22
+ device = "cuda" if torch.cuda.is_available() else "cpu"
23
+ flux_model.to(device)
24
+ omni_model.to(device)
25
+
26
+ try:
27
+ # Step 1: Stylize character image using FLUX-Kontext diffusion model
28
+ stylized_image = flux_model(
29
+ prompt=prompt,
30
+ image=image,
31
+ guidance_scale=7.5,
32
+ num_inference_steps=30
33
+ ).images[0]
34
+
35
+ # Step 2: Animate the stylized image with lip-sync using OmniAvatar
36
+ result = omni_model(
37
+ image=stylized_image,
38
+ audio=audio,
39
+ style=style,
40
+ )
41
+
42
+ # The model should return a dictionary with "video" or similar key
43
+ if isinstance(result, dict) and "video" in result:
44
+ return result["video"]
45
+ elif hasattr(result, "videos"):
46
+ return result.videos[0]
47
+ else:
48
+ return f"⚠️ Unexpected output format: {type(result)}"
49
+ except Exception as e:
50
+ return f"⚠️ Error during generation: {str(e)}"
51
+
52
+ # ---------------------- GRADIO UI ----------------------
53
+ with gr.Blocks(title="🎭 Claymation Talking Avatar Generator") as demo:
54
+ gr.Markdown("""
55
+ # 🎬 Claymation Talking Avatar Generator
56
+ Generate claymation-style speaking avatars using **FLUX-Kontext** for stylization
57
+ and **OmniAvatar** for lip-synced animation.
58
+ """)
59
+
60
+ with gr.Row():
61
+ image_input = gr.Image(label="πŸ§‘ Upload Character Image", type="filepath")
62
+ audio_input = gr.Audio(label="🎀 Upload Voice Audio", type="filepath")
63
+
64
+ prompt = gr.Textbox(
65
+ label="πŸ“ Prompt (Optional)",
66
+ value="A claymation character speaking realistically",
67
+ placeholder="Describe the style or mood..."
68
+ )
69
+
70
+ with gr.Row():
71
+ style_dropdown = gr.Dropdown(
72
+ choices=["claymation", "toon", "realistic"],
73
+ value="claymation",
74
+ label="🎨 Style"
75
+ )
76
+ generate_button = gr.Button("πŸš€ Generate Video")
77
+
78
+ video_output = gr.Video(label="πŸŽ₯ Generated Output")
79
+
80
+ generate_button.click(
81
+ fn=generate_video,
82
+ inputs=[image_input, audio_input, prompt, style_dropdown],
83
+ outputs=video_output
84
+ )
85
+
86
+ # ---------------------- LAUNCH ----------------------
87
+ demo.queue().launch(debug=True, share=False)