00Boobs00 commited on
Commit
d4a9e6e
·
verified ·
1 Parent(s): 8ab87e1

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +317 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from diffusers import AutoencoderKLWan, WanImageToVideoPipeline, UniPCMultistepScheduler
3
+ from diffusers.utils import export_to_video
4
+ from transformers import CLIPVisionModel
5
+ import gradio as gr
6
+ import tempfile
7
+ import spaces
8
+ from huggingface_hub import hf_hub_download, list_repo_files
9
+ import numpy as np
10
+ from PIL import Image
11
+ import random
12
+ import os
13
+
14
+ # ----------------------
15
+ #
16
+ # CONFIGURATION
17
+ #
18
+ # ----------------------
19
+
20
+ MODEL_ID = "gaalos/Wan2.1-I2V-14B-720P-Diffusers-scaled"
21
+ LORA_REPO_ID = "hotdogs/wan_nsfw_lora"
22
+ MOD_VALUE = 32
23
+ FIXED_FPS = 24
24
+ MIN_FRAMES_MODEL = 8
25
+ MAX_FRAMES_MODEL = 30 * FIXED_FPS # 30s max
26
+ MAX_SEED = np.iinfo(np.int32).max
27
+
28
+ DEFAULT_H_SLIDER_VALUE = 640
29
+ DEFAULT_W_SLIDER_VALUE = 1024
30
+ NEW_FORMULA_MAX_AREA = 640.0 * 1024.0
31
+ SLIDER_MIN_H, SLIDER_MAX_H = 128, 1024
32
+ SLIDER_MIN_W, SLIDER_MAX_W = 128, 1024
33
+
34
+ default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
35
+ default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
36
+
37
+ # ----------------------
38
+ # LOAD BASE MODEL
39
+ # ----------------------
40
+
41
+ print("Loading models...")
42
+ image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32)
43
+ vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
44
+ pipe = WanImageToVideoPipeline.from_pretrained(
45
+ MODEL_ID, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16
46
+ )
47
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
48
+ pipe.to("cuda")
49
+ print("Models loaded successfully!")
50
+
51
+ # ----------------------
52
+ # LOAD ALL LORAS FROM "hotdogs/wan_nsfw_lora"
53
+ # ----------------------
54
+
55
+ print("Loading LoRAs...")
56
+ lora_files = [f for f in list_repo_files(LORA_REPO_ID) if f.endswith(".safetensors")]
57
+ lora_paths = {}
58
+ for file in lora_files:
59
+ local_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=file)
60
+ adapter_name = os.path.splitext(os.path.basename(file))[0]
61
+ lora_paths[adapter_name] = local_path
62
+ pipe.load_lora_weights(local_path, adapter_name=adapter_name)
63
+ print(f"Loaded {len(lora_paths)} LoRA adapters: {list(lora_paths.keys())}")
64
+
65
+ # ----------------------
66
+ # DIMENSION HELPERS
67
+ # ----------------------
68
+
69
+ def _calculate_new_dimensions(pil_image):
70
+ """Calculate optimal dimensions based on image aspect ratio"""
71
+ orig_w, orig_h = pil_image.size
72
+ if orig_w <= 0 or orig_h <= 0:
73
+ return DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE
74
+
75
+ aspect_ratio = orig_h / orig_w
76
+ calc_h = round(np.sqrt(NEW_FORMULA_MAX_AREA * aspect_ratio))
77
+ calc_w = round(np.sqrt(NEW_FORMULA_MAX_AREA / aspect_ratio))
78
+
79
+ calc_h = max(MOD_VALUE, (calc_h // MOD_VALUE) * MOD_VALUE)
80
+ calc_w = max(MOD_VALUE, (calc_w // MOD_VALUE) * MOD_VALUE)
81
+
82
+ new_h = int(np.clip(calc_h, SLIDER_MIN_H, (SLIDER_MAX_H // MOD_VALUE) * MOD_VALUE))
83
+ new_w = int(np.clip(calc_w, SLIDER_MIN_W, (SLIDER_MAX_W // MOD_VALUE) * MOD_VALUE))
84
+
85
+ return new_h, new_w
86
+
87
+ def handle_image_upload(image, current_h, current_w):
88
+ """Update height/width sliders when image is uploaded"""
89
+ if image is None:
90
+ return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
91
+ try:
92
+ new_h, new_w = _calculate_new_dimensions(image)
93
+ return gr.update(value=new_h), gr.update(value=new_w)
94
+ except Exception as e:
95
+ print(f"Error calculating dimensions: {e}")
96
+ return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
97
+
98
+ # ----------------------
99
+ # DURATION CONFIGURATION
100
+ # ----------------------
101
+
102
+ def get_duration(input_image, prompt, height, width, negative_prompt, duration_seconds,
103
+ guidance_scale, steps, seed, randomize_seed, lora_list, progress):
104
+ """Calculate GPU duration for spaces.GPU decorator"""
105
+ base = 60
106
+ if duration_seconds > 10: base += 30
107
+ if duration_seconds > 20: base += 30
108
+ if steps > 8: base += 20
109
+ return base
110
+
111
+ # ----------------------
112
+ # MAIN GENERATION FUNCTION
113
+ # ----------------------
114
+
115
+ @spaces.GPU(duration=get_duration)
116
+ def generate_video(input_image, prompt, height, width,
117
+ negative_prompt=default_negative_prompt, duration_seconds=2,
118
+ guidance_scale=1, steps=4,
119
+ seed=42, randomize_seed=False,
120
+ lora_list=None, progress=gr.Progress(track_tqdm=True)):
121
+ """
122
+ Generate video from image using Wan2.1 I2V pipeline with optional LoRAs
123
+ """
124
+ if input_image is None:
125
+ raise gr.Error("Please upload an image.")
126
+
127
+ current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
128
+ target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
129
+ target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
130
+ num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
131
+ resized_image = input_image.resize((target_w, target_h))
132
+
133
+ selected_adapters = lora_list or []
134
+ pipe.set_adapters(selected_adapters, adapter_weights=[0.95] * len(selected_adapters))
135
+ pipe.fuse_lora()
136
+
137
+ with torch.inference_mode():
138
+ output_frames_list = pipe(
139
+ image=resized_image, prompt=prompt, negative_prompt=negative_prompt,
140
+ height=target_h, width=target_w, num_frames=num_frames,
141
+ guidance_scale=float(guidance_scale), num_inference_steps=int(steps),
142
+ generator=torch.Generator(device="cuda").manual_seed(current_seed)
143
+ ).frames[0]
144
+
145
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
146
+ video_path = tmpfile.name
147
+ export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
148
+ return video_path, current_seed
149
+
150
+ # ----------------------
151
+ # GRADIO UI - GRADIO 6 SYNTAX
152
+ # ----------------------
153
+
154
+ with gr.Blocks() as demo:
155
+ # Header with anycoder link
156
+ gr.Markdown(
157
+ """
158
+ # Wan2.1 I2V + Multi-LoRA Generator (NSFW LoRA repo)
159
+ Generate videos using LoRAs from `hotdogs/wan_nsfw_lora` with Wan2.1. Up to **30s** per video.
160
+ [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
161
+ """
162
+ )
163
+
164
+ with gr.Row():
165
+ with gr.Column(scale=1):
166
+ gr.Markdown("### Input & Settings")
167
+ image_input = gr.Image(
168
+ type="pil",
169
+ label="Input Image",
170
+ sources=["upload", "webcam", "clipboard"],
171
+ height=300
172
+ )
173
+ prompt = gr.Textbox(
174
+ label="Prompt",
175
+ value=default_prompt_i2v,
176
+ placeholder="Describe the motion you want to see...",
177
+ lines=3
178
+ )
179
+ duration_slider = gr.Slider(
180
+ minimum=round(MIN_FRAMES_MODEL / FIXED_FPS, 1),
181
+ maximum=30.0,
182
+ step=0.1,
183
+ value=2,
184
+ label="Video Duration (seconds)"
185
+ )
186
+
187
+ with gr.Accordion("Advanced Settings", open=False):
188
+ negative_prompt_input = gr.Textbox(
189
+ label="Negative Prompt",
190
+ value=default_negative_prompt,
191
+ lines=3
192
+ )
193
+ seed_input = gr.Slider(
194
+ label="Seed",
195
+ minimum=0,
196
+ maximum=MAX_SEED,
197
+ step=1,
198
+ value=42
199
+ )
200
+ random_seed_checkbox = gr.Checkbox(
201
+ label="Randomize seed",
202
+ value=True
203
+ )
204
+
205
+ with gr.Row():
206
+ height_input = gr.Slider(
207
+ SLIDER_MIN_H,
208
+ SLIDER_MAX_H,
209
+ MOD_VALUE,
210
+ value=DEFAULT_H_SLIDER_VALUE,
211
+ label="Output Height"
212
+ )
213
+ width_input = gr.Slider(
214
+ SLIDER_MIN_W,
215
+ SLIDER_MAX_W,
216
+ MOD_VALUE,
217
+ value=DEFAULT_W_SLIDER_VALUE,
218
+ label="Output Width"
219
+ )
220
+
221
+ steps_slider = gr.Slider(
222
+ 1,
223
+ 30,
224
+ step=1,
225
+ value=4,
226
+ label="Inference Steps"
227
+ )
228
+ guidance_slider = gr.Slider(
229
+ 0.0,
230
+ 20.0,
231
+ step=0.5,
232
+ value=1.0,
233
+ label="Guidance Scale",
234
+ visible=False
235
+ )
236
+ lora_selector = gr.CheckboxGroup(
237
+ choices=list(lora_paths.keys()),
238
+ label="Activate LoRA(s)",
239
+ info="Select one or more LoRAs to apply"
240
+ )
241
+
242
+ generate_button = gr.Button(
243
+ "Generate Video",
244
+ variant="primary",
245
+ size="lg"
246
+ )
247
+
248
+ with gr.Column(scale=1):
249
+ gr.Markdown("### Output")
250
+ video_output = gr.Video(
251
+ label="Generated Video",
252
+ autoplay=True,
253
+ height=400
254
+ )
255
+ seed_output = gr.Textbox(
256
+ label="Seed used",
257
+ interactive=False,
258
+ info="Use this seed to reproduce the same video"
259
+ )
260
+
261
+ # Event handlers
262
+ image_input.upload(
263
+ fn=handle_image_upload,
264
+ inputs=[image_input, height_input, width_input],
265
+ outputs=[height_input, width_input],
266
+ api_visibility="private"
267
+ )
268
+ image_input.clear(
269
+ fn=handle_image_upload,
270
+ inputs=[image_input, height_input, width_input],
271
+ outputs=[height_input, width_input],
272
+ api_visibility="private"
273
+ )
274
+
275
+ inputs = [
276
+ image_input, prompt, height_input, width_input,
277
+ negative_prompt_input, duration_slider, guidance_slider,
278
+ steps_slider, seed_input, random_seed_checkbox, lora_selector
279
+ ]
280
+
281
+ generate_button.click(
282
+ fn=generate_video,
283
+ inputs=inputs,
284
+ outputs=[video_output, seed_output],
285
+ api_visibility="public",
286
+ concurrency_limit=1
287
+ )
288
+
289
+ # ----------------------
290
+ # LAUNCH - GRADIO 6 SYNTAX
291
+ # ----------------------
292
+
293
+ if __name__ == "__main__":
294
+ demo.queue().launch(
295
+ theme=gr.themes.Soft(
296
+ primary_hue="blue",
297
+ secondary_hue="indigo",
298
+ neutral_hue="slate",
299
+ font=gr.themes.GoogleFont("Inter"),
300
+ text_size="lg",
301
+ spacing_size="lg",
302
+ radius_size="md"
303
+ ).set(
304
+ button_primary_background_fill="*primary_600",
305
+ button_primary_background_fill_hover="*primary_700",
306
+ block_title_text_weight="600",
307
+ block_background_fill="*neutral_50"
308
+ ),
309
+ footer_links=[
310
+ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
311
+ {"label": "Model", "url": f"https://huggingface.co/{MODEL_ID}"},
312
+ {"label": "LoRAs", "url": f"https://huggingface.co/{LORA_REPO_ID}"}
313
+ ],
314
+ show_error=True,
315
+ max_threads=40,
316
+ share=False
317
+ )
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Pillow
2
+ diffusers
3
+ gradio
4
+ huggingface_hub
5
+ numpy
6
+ spaces
7
+ torch
8
+ transformers