Spaces:
Paused
Paused
File size: 9,096 Bytes
31fe446 0c6554e 31fe446 5f6d3d5 31fe446 5f6d3d5 31fe446 0c6554e 31fe446 0c6554e 31fe446 0c6554e 31fe446 0c6554e 31fe446 0c6554e 31fe446 0c6554e 31fe446 0c6554e 31fe446 0c6554e 31fe446 0c6554e 31fe446 64b6a2a 0c6554e 64b6a2a 0c6554e 64b6a2a 0c6554e 64b6a2a 0c6554e 64b6a2a 0c6554e 64b6a2a 0c6554e 64b6a2a 0c6554e 64b6a2a 0c6554e 64b6a2a e2d89f9 31fe446 0c6554e 31fe446 0c6554e 31fe446 0c6554e 31fe446 0c6554e ec63cbd 31fe446 ec63cbd 31fe446 0c6554e 31fe446 ec63cbd 31fe446 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 | import os
import gradio as gr
import numpy as np
import random
import spaces
import torch
from diffusers import Flux2KleinPipeline
from PIL import Image
# NEW: for reading first frame
import cv2
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
MAX_SEED = np.iinfo(np.int32).max
REPO_ID_DISTILLED = "black-forest-labs/FLUX.2-klein-9B"
LORA_REPO_ID = "Alissonerdx/BFS-Best-Face-Swap"
LORA_FILENAME = "bfs_head_v1_flux-klein_9b_step3750_rank64.safetensors"
FACE_SWAP_PROMPT = "start with Picture 2 as the base image, keeping its lighting, environment, background, and setting. remove the person from Picture 2 completely and replace them with the person from Picture 1, strictly preserving the face, hair, skin tone, and physical features of Picture 1. match the body position, posture, pose, and head rotation of the original person in Picture 2. blend the subject naturally into the scene so that the lighting and shadows on the body reflect the environment of Picture 2, high quality, sharp details, 4k."
print("Loading FLUX.2 Klein 9B Distilled model...")
pipe = Flux2KleinPipeline.from_pretrained(REPO_ID_DISTILLED, torch_dtype=dtype)
pipe.to(device)
# print(f"Loading LoRA from {LORA_REPO_ID}...")
# pipe.load_lora_weights(LORA_REPO_ID, weight_name=LORA_FILENAME)
# print("LoRA loaded successfully!")
def first_frame_from_video(video_value) -> Image.Image:
"""
Convert a Gradio Video input into the first frame as a PIL Image.
Gradio Video value is typically either:
- a filepath string
- or a dict-like object with a path
- or (older versions) a tuple
We handle the common cases robustly.
"""
if video_value is None:
return None
video_path = None
# Common shapes across gradio versions:
if isinstance(video_value, str):
video_path = video_value
elif isinstance(video_value, dict) and "path" in video_value:
video_path = video_value["path"]
elif isinstance(video_value, (list, tuple)) and len(video_value) > 0:
# sometimes it's (path, metadata) or [path, ...]
video_path = video_value[0]
else:
# last attempt: attribute access
video_path = getattr(video_value, "path", None)
if not video_path or not os.path.exists(video_path):
raise gr.Error("Could not read the uploaded video file.")
cap = cv2.VideoCapture(video_path)
ok, frame_bgr = cap.read()
cap.release()
if not ok or frame_bgr is None:
raise gr.Error("Could not extract the first frame from the video.")
frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
return Image.fromarray(frame_rgb)
def update_dimensions_from_video(target_video):
"""
Update width/height based on the first frame of the uploaded video.
Keeps one side at 1024 and scales the other proportionally,
with both sides as multiples of 8.
"""
target_image = first_frame_from_video(target_video)
if target_image is None:
return 1024, 1024
img_width, img_height = target_image.size
aspect_ratio = img_width / img_height
if aspect_ratio >= 1: # Landscape or square
new_width = 1024
new_height = int(1024 / aspect_ratio)
else: # Portrait
new_height = 1024
new_width = int(1024 * aspect_ratio)
new_width = round(new_width / 8) * 8
new_height = round(new_height / 8) * 8
new_width = max(256, min(1024, new_width))
new_height = max(256, min(1024, new_height))
return new_width, new_height
@spaces.GPU(duration=85)
def face_swap(
reference_face: Image.Image,
target_video, # CHANGED: now a video input
seed: int = 42,
randomize_seed: bool = False,
width: int = 1024,
height: int = 1024,
num_inference_steps: int = 4,
guidance_scale: float = 1.0,
progress=gr.Progress(track_tqdm=True)
):
if reference_face is None or target_video is None:
raise gr.Error("Please provide both a reference face and a target video!")
# Extract first frame to use as target image
target_image = first_frame_from_video(target_video)
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator(device=device).manual_seed(seed)
image_list = [target_image, reference_face]
progress(0.2, desc="Swapping face...")
image = pipe(
prompt=FACE_SWAP_PROMPT,
image=image_list,
height=height,
width=width,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
generator=generator,
).images[0]
return (target_image, image), seed
css = """
#col-container {
margin: 0 auto;
max-width: 1200px;
}
.image-container img {
object-fit: contain;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("""# Face Swap with FLUX.2 Klein 9B
Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggingface.co/Alissonerdx/BFS-Best-Face-Swap) LoRA
""")
with gr.Row():
with gr.Column():
with gr.Row():
reference_face = gr.Image(
label="Reference Face",
type="pil",
sources=["upload"],
elem_classes="image-container"
)
# CHANGED: target image -> target video
target_video = gr.Video(
label="Target Video (Body/Scene) - first frame will be used",
sources=["upload"]
)
run_button = gr.Button("Swap Face", visible=False)
with gr.Accordion("Advanced Settings", open=False):
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Row():
width = gr.Slider(
label="Width",
minimum=256,
maximum=1024,
step=8,
value=1024,
)
height = gr.Slider(
label="Height",
minimum=256,
maximum=1024,
step=8,
value=1024,
)
with gr.Row():
num_inference_steps = gr.Slider(
label="Inference Steps",
minimum=1,
maximum=20,
step=1,
value=4,
info="Number of denoising steps (4 is optimal for distilled model)"
)
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=0.0,
maximum=5.0,
step=0.1,
value=1.0,
info="How closely to follow the prompt (1.0 is optimal for distilled model)"
)
comparison_slider = gr.ImageSlider(
label="Before / After",
type="pil"
)
seed_output = gr.Number(label="Seed Used", visible=False)
# CHANGED: auto-update dimensions when target video is uploaded/changed
target_video.change(
fn=update_dimensions_from_video,
inputs=[target_video],
outputs=[width, height]
)
# CHANGED: swap inputs use target_video instead of target_image
swap_inputs = [
reference_face,
target_video,
seed,
randomize_seed,
width,
height,
num_inference_steps,
guidance_scale
]
swap_outputs = [comparison_slider, seed_output]
run_button.click(
fn=face_swap,
inputs=swap_inputs,
outputs=swap_outputs,
)
def auto_swap_wrapper(ref_face, targ_vid, s, rand_s, w, h, steps, cfg):
if ref_face is not None and targ_vid is not None:
result = face_swap(ref_face, targ_vid, s, rand_s, w, h, steps, cfg)
return result[0], result[1], gr.update(visible=True)
return None, s, gr.update(visible=False)
reference_face.change(
fn=auto_swap_wrapper,
inputs=swap_inputs,
outputs=[comparison_slider, seed_output, run_button],
)
# CHANGED: trigger on target video upload/change
target_video.change(
fn=auto_swap_wrapper,
inputs=swap_inputs,
outputs=[comparison_slider, seed_output, run_button],
)
if __name__ == "__main__":
demo.launch(share=True, theme=gr.themes.Citrus())
|