Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,21 +7,18 @@ import torch
|
|
| 7 |
from diffusers import Flux2KleinPipeline
|
| 8 |
from PIL import Image
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
dtype = torch.bfloat16
|
| 11 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 12 |
|
| 13 |
MAX_SEED = np.iinfo(np.int32).max
|
| 14 |
|
| 15 |
-
# Model repository ID for 9B distilled
|
| 16 |
REPO_ID_DISTILLED = "black-forest-labs/FLUX.2-klein-9B"
|
| 17 |
-
|
| 18 |
-
# LoRA repository and file
|
| 19 |
LORA_REPO_ID = "Alissonerdx/BFS-Best-Face-Swap"
|
| 20 |
LORA_FILENAME = "bfs_head_v1_flux-klein_9b_step3750_rank64.safetensors"
|
| 21 |
|
| 22 |
-
# Fixed prompt for face swapping
|
| 23 |
-
#FACE_SWAP_PROMPT = "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, nose structure of Picture 2. copy the direction of the eye, head rotation, micro expressions from Picture 1, high quality, sharp details, 4k."
|
| 24 |
-
|
| 25 |
FACE_SWAP_PROMPT = """head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. Remove the head from Picture 1 completely and replace it with the head from Picture 2.
|
| 26 |
|
| 27 |
FROM PICTURE 1 (strictly preserve):
|
|
@@ -45,24 +42,60 @@ print(f"Loading LoRA from {LORA_REPO_ID}...")
|
|
| 45 |
pipe.load_lora_weights(LORA_REPO_ID, weight_name=LORA_FILENAME)
|
| 46 |
print("LoRA loaded successfully!")
|
| 47 |
|
| 48 |
-
|
|
|
|
| 49 |
"""
|
| 50 |
-
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
if target_image is None:
|
| 62 |
-
return 1024, 1024
|
| 63 |
|
| 64 |
img_width, img_height = target_image.size
|
| 65 |
-
|
| 66 |
aspect_ratio = img_width / img_height
|
| 67 |
|
| 68 |
if aspect_ratio >= 1: # Landscape or square
|
|
@@ -72,11 +105,9 @@ def update_dimensions_from_image(target_image):
|
|
| 72 |
new_height = 1024
|
| 73 |
new_width = int(1024 * aspect_ratio)
|
| 74 |
|
| 75 |
-
# Round to nearest multiple of 8
|
| 76 |
new_width = round(new_width / 8) * 8
|
| 77 |
new_height = round(new_height / 8) * 8
|
| 78 |
|
| 79 |
-
# Ensure within valid range (minimum 256, maximum 1024)
|
| 80 |
new_width = max(256, min(1024, new_width))
|
| 81 |
new_height = max(256, min(1024, new_height))
|
| 82 |
|
|
@@ -86,7 +117,7 @@ def update_dimensions_from_image(target_image):
|
|
| 86 |
@spaces.GPU(duration=85)
|
| 87 |
def face_swap(
|
| 88 |
reference_face: Image.Image,
|
| 89 |
-
|
| 90 |
seed: int = 42,
|
| 91 |
randomize_seed: bool = False,
|
| 92 |
width: int = 1024,
|
|
@@ -95,32 +126,17 @@ def face_swap(
|
|
| 95 |
guidance_scale: float = 1.0,
|
| 96 |
progress=gr.Progress(track_tqdm=True)
|
| 97 |
):
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
target_image: The target body/base image (Picture 1).
|
| 104 |
-
seed: Random seed for reproducible generation.
|
| 105 |
-
randomize_seed: Set to True to use a random seed.
|
| 106 |
-
width: Output image width in pixels (256-1024, must be multiple of 8).
|
| 107 |
-
height: Output image height in pixels (256-1024, must be multiple of 8).
|
| 108 |
-
num_inference_steps: Number of denoising steps (default 4 for distilled).
|
| 109 |
-
guidance_scale: How closely to follow the prompt (default 1.0 for distilled).
|
| 110 |
-
|
| 111 |
-
Returns:
|
| 112 |
-
tuple: A tuple containing the generated PIL Image and the seed used.
|
| 113 |
-
"""
|
| 114 |
-
if reference_face is None or target_image is None:
|
| 115 |
-
raise gr.Error("Please provide both a reference face and a target image!")
|
| 116 |
|
| 117 |
if randomize_seed:
|
| 118 |
seed = random.randint(0, MAX_SEED)
|
| 119 |
|
| 120 |
generator = torch.Generator(device=device).manual_seed(seed)
|
| 121 |
|
| 122 |
-
# Important: Pass target image (body) first, then reference face
|
| 123 |
-
# This matches the prompt structure: Picture 1 = target, Picture 2 = reference
|
| 124 |
image_list = [target_image, reference_face]
|
| 125 |
|
| 126 |
progress(0.2, desc="Swapping face...")
|
|
@@ -135,7 +151,6 @@ def face_swap(
|
|
| 135 |
generator=generator,
|
| 136 |
).images[0]
|
| 137 |
|
| 138 |
-
# Return slider comparison (before, after) and seed
|
| 139 |
return (target_image, image), seed
|
| 140 |
|
| 141 |
|
|
@@ -150,7 +165,6 @@ css = """
|
|
| 150 |
"""
|
| 151 |
|
| 152 |
with gr.Blocks(css=css) as demo:
|
| 153 |
-
|
| 154 |
with gr.Column(elem_id="col-container"):
|
| 155 |
gr.Markdown("""# Face Swap with FLUX.2 Klein 9B
|
| 156 |
|
|
@@ -166,14 +180,15 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
|
|
| 166 |
sources=["upload"],
|
| 167 |
elem_classes="image-container"
|
| 168 |
)
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
sources=["upload"]
|
| 174 |
-
elem_classes="image-container"
|
| 175 |
)
|
|
|
|
| 176 |
run_button = gr.Button("Swap Face", visible=False)
|
|
|
|
| 177 |
with gr.Accordion("Advanced Settings", open=False):
|
| 178 |
seed = gr.Slider(
|
| 179 |
label="Seed",
|
|
@@ -182,9 +197,9 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
|
|
| 182 |
step=1,
|
| 183 |
value=0,
|
| 184 |
)
|
| 185 |
-
|
| 186 |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
| 187 |
-
|
| 188 |
with gr.Row():
|
| 189 |
width = gr.Slider(
|
| 190 |
label="Width",
|
|
@@ -193,7 +208,7 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
|
|
| 193 |
step=8,
|
| 194 |
value=1024,
|
| 195 |
)
|
| 196 |
-
|
| 197 |
height = gr.Slider(
|
| 198 |
label="Height",
|
| 199 |
minimum=256,
|
|
@@ -201,7 +216,7 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
|
|
| 201 |
step=8,
|
| 202 |
value=1024,
|
| 203 |
)
|
| 204 |
-
|
| 205 |
with gr.Row():
|
| 206 |
num_inference_steps = gr.Slider(
|
| 207 |
label="Inference Steps",
|
|
@@ -211,7 +226,7 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
|
|
| 211 |
value=4,
|
| 212 |
info="Number of denoising steps (4 is optimal for distilled model)"
|
| 213 |
)
|
| 214 |
-
|
| 215 |
guidance_scale = gr.Slider(
|
| 216 |
label="Guidance Scale",
|
| 217 |
minimum=0.0,
|
|
@@ -221,28 +236,24 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
|
|
| 221 |
info="How closely to follow the prompt (1.0 is optimal for distilled model)"
|
| 222 |
)
|
| 223 |
|
| 224 |
-
|
| 225 |
comparison_slider = gr.ImageSlider(
|
| 226 |
label="Before / After",
|
| 227 |
type="pil"
|
| 228 |
)
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
seed_output = gr.Number(label="Seed Used", visible=False)
|
| 234 |
|
| 235 |
-
#
|
| 236 |
-
|
| 237 |
-
fn=
|
| 238 |
-
inputs=[
|
| 239 |
outputs=[width, height]
|
| 240 |
)
|
| 241 |
|
| 242 |
-
#
|
| 243 |
swap_inputs = [
|
| 244 |
reference_face,
|
| 245 |
-
|
| 246 |
seed,
|
| 247 |
randomize_seed,
|
| 248 |
width,
|
|
@@ -252,31 +263,26 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
|
|
| 252 |
]
|
| 253 |
swap_outputs = [comparison_slider, seed_output]
|
| 254 |
|
| 255 |
-
# Manual trigger via button
|
| 256 |
run_button.click(
|
| 257 |
fn=face_swap,
|
| 258 |
inputs=swap_inputs,
|
| 259 |
outputs=swap_outputs,
|
| 260 |
)
|
| 261 |
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
if ref_face is not None and target_img is not None:
|
| 266 |
-
result = face_swap(ref_face, target_img, s, rand_s, w, h, steps, cfg)
|
| 267 |
-
# Show the button after first generation
|
| 268 |
return result[0], result[1], gr.update(visible=True)
|
| 269 |
return None, s, gr.update(visible=False)
|
| 270 |
|
| 271 |
-
# Trigger on reference face upload/change
|
| 272 |
reference_face.change(
|
| 273 |
fn=auto_swap_wrapper,
|
| 274 |
inputs=swap_inputs,
|
| 275 |
outputs=[comparison_slider, seed_output, run_button],
|
| 276 |
)
|
| 277 |
|
| 278 |
-
#
|
| 279 |
-
|
| 280 |
fn=auto_swap_wrapper,
|
| 281 |
inputs=swap_inputs,
|
| 282 |
outputs=[comparison_slider, seed_output, run_button],
|
|
|
|
| 7 |
from diffusers import Flux2KleinPipeline
|
| 8 |
from PIL import Image
|
| 9 |
|
| 10 |
+
# NEW: for reading first frame
|
| 11 |
+
import cv2
|
| 12 |
+
|
| 13 |
dtype = torch.bfloat16
|
| 14 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 15 |
|
| 16 |
MAX_SEED = np.iinfo(np.int32).max
|
| 17 |
|
|
|
|
| 18 |
REPO_ID_DISTILLED = "black-forest-labs/FLUX.2-klein-9B"
|
|
|
|
|
|
|
| 19 |
LORA_REPO_ID = "Alissonerdx/BFS-Best-Face-Swap"
|
| 20 |
LORA_FILENAME = "bfs_head_v1_flux-klein_9b_step3750_rank64.safetensors"
|
| 21 |
|
|
|
|
|
|
|
|
|
|
| 22 |
FACE_SWAP_PROMPT = """head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. Remove the head from Picture 1 completely and replace it with the head from Picture 2.
|
| 23 |
|
| 24 |
FROM PICTURE 1 (strictly preserve):
|
|
|
|
| 42 |
pipe.load_lora_weights(LORA_REPO_ID, weight_name=LORA_FILENAME)
|
| 43 |
print("LoRA loaded successfully!")
|
| 44 |
|
| 45 |
+
|
| 46 |
+
def first_frame_from_video(video_value) -> Image.Image:
|
| 47 |
"""
|
| 48 |
+
Convert a Gradio Video input into the first frame as a PIL Image.
|
| 49 |
|
| 50 |
+
Gradio Video value is typically either:
|
| 51 |
+
- a filepath string
|
| 52 |
+
- or a dict-like object with a path
|
| 53 |
+
- or (older versions) a tuple
|
| 54 |
+
We handle the common cases robustly.
|
| 55 |
+
"""
|
| 56 |
+
if video_value is None:
|
| 57 |
+
return None
|
| 58 |
+
|
| 59 |
+
video_path = None
|
| 60 |
+
|
| 61 |
+
# Common shapes across gradio versions:
|
| 62 |
+
if isinstance(video_value, str):
|
| 63 |
+
video_path = video_value
|
| 64 |
+
elif isinstance(video_value, dict) and "path" in video_value:
|
| 65 |
+
video_path = video_value["path"]
|
| 66 |
+
elif isinstance(video_value, (list, tuple)) and len(video_value) > 0:
|
| 67 |
+
# sometimes it's (path, metadata) or [path, ...]
|
| 68 |
+
video_path = video_value[0]
|
| 69 |
+
else:
|
| 70 |
+
# last attempt: attribute access
|
| 71 |
+
video_path = getattr(video_value, "path", None)
|
| 72 |
|
| 73 |
+
if not video_path or not os.path.exists(video_path):
|
| 74 |
+
raise gr.Error("Could not read the uploaded video file.")
|
| 75 |
|
| 76 |
+
cap = cv2.VideoCapture(video_path)
|
| 77 |
+
ok, frame_bgr = cap.read()
|
| 78 |
+
cap.release()
|
| 79 |
+
|
| 80 |
+
if not ok or frame_bgr is None:
|
| 81 |
+
raise gr.Error("Could not extract the first frame from the video.")
|
| 82 |
+
|
| 83 |
+
frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
|
| 84 |
+
return Image.fromarray(frame_rgb)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def update_dimensions_from_video(target_video):
|
| 88 |
"""
|
| 89 |
+
Update width/height based on the first frame of the uploaded video.
|
| 90 |
+
|
| 91 |
+
Keeps one side at 1024 and scales the other proportionally,
|
| 92 |
+
with both sides as multiples of 8.
|
| 93 |
+
"""
|
| 94 |
+
target_image = first_frame_from_video(target_video)
|
| 95 |
if target_image is None:
|
| 96 |
+
return 1024, 1024
|
| 97 |
|
| 98 |
img_width, img_height = target_image.size
|
|
|
|
| 99 |
aspect_ratio = img_width / img_height
|
| 100 |
|
| 101 |
if aspect_ratio >= 1: # Landscape or square
|
|
|
|
| 105 |
new_height = 1024
|
| 106 |
new_width = int(1024 * aspect_ratio)
|
| 107 |
|
|
|
|
| 108 |
new_width = round(new_width / 8) * 8
|
| 109 |
new_height = round(new_height / 8) * 8
|
| 110 |
|
|
|
|
| 111 |
new_width = max(256, min(1024, new_width))
|
| 112 |
new_height = max(256, min(1024, new_height))
|
| 113 |
|
|
|
|
| 117 |
@spaces.GPU(duration=85)
|
| 118 |
def face_swap(
|
| 119 |
reference_face: Image.Image,
|
| 120 |
+
target_video, # CHANGED: now a video input
|
| 121 |
seed: int = 42,
|
| 122 |
randomize_seed: bool = False,
|
| 123 |
width: int = 1024,
|
|
|
|
| 126 |
guidance_scale: float = 1.0,
|
| 127 |
progress=gr.Progress(track_tqdm=True)
|
| 128 |
):
|
| 129 |
+
if reference_face is None or target_video is None:
|
| 130 |
+
raise gr.Error("Please provide both a reference face and a target video!")
|
| 131 |
+
|
| 132 |
+
# Extract first frame to use as target image
|
| 133 |
+
target_image = first_frame_from_video(target_video)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
if randomize_seed:
|
| 136 |
seed = random.randint(0, MAX_SEED)
|
| 137 |
|
| 138 |
generator = torch.Generator(device=device).manual_seed(seed)
|
| 139 |
|
|
|
|
|
|
|
| 140 |
image_list = [target_image, reference_face]
|
| 141 |
|
| 142 |
progress(0.2, desc="Swapping face...")
|
|
|
|
| 151 |
generator=generator,
|
| 152 |
).images[0]
|
| 153 |
|
|
|
|
| 154 |
return (target_image, image), seed
|
| 155 |
|
| 156 |
|
|
|
|
| 165 |
"""
|
| 166 |
|
| 167 |
with gr.Blocks(css=css) as demo:
|
|
|
|
| 168 |
with gr.Column(elem_id="col-container"):
|
| 169 |
gr.Markdown("""# Face Swap with FLUX.2 Klein 9B
|
| 170 |
|
|
|
|
| 180 |
sources=["upload"],
|
| 181 |
elem_classes="image-container"
|
| 182 |
)
|
| 183 |
+
|
| 184 |
+
# CHANGED: target image -> target video
|
| 185 |
+
target_video = gr.Video(
|
| 186 |
+
label="Target Video (Body/Scene) - first frame will be used",
|
| 187 |
+
sources=["upload"]
|
|
|
|
| 188 |
)
|
| 189 |
+
|
| 190 |
run_button = gr.Button("Swap Face", visible=False)
|
| 191 |
+
|
| 192 |
with gr.Accordion("Advanced Settings", open=False):
|
| 193 |
seed = gr.Slider(
|
| 194 |
label="Seed",
|
|
|
|
| 197 |
step=1,
|
| 198 |
value=0,
|
| 199 |
)
|
| 200 |
+
|
| 201 |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
| 202 |
+
|
| 203 |
with gr.Row():
|
| 204 |
width = gr.Slider(
|
| 205 |
label="Width",
|
|
|
|
| 208 |
step=8,
|
| 209 |
value=1024,
|
| 210 |
)
|
| 211 |
+
|
| 212 |
height = gr.Slider(
|
| 213 |
label="Height",
|
| 214 |
minimum=256,
|
|
|
|
| 216 |
step=8,
|
| 217 |
value=1024,
|
| 218 |
)
|
| 219 |
+
|
| 220 |
with gr.Row():
|
| 221 |
num_inference_steps = gr.Slider(
|
| 222 |
label="Inference Steps",
|
|
|
|
| 226 |
value=4,
|
| 227 |
info="Number of denoising steps (4 is optimal for distilled model)"
|
| 228 |
)
|
| 229 |
+
|
| 230 |
guidance_scale = gr.Slider(
|
| 231 |
label="Guidance Scale",
|
| 232 |
minimum=0.0,
|
|
|
|
| 236 |
info="How closely to follow the prompt (1.0 is optimal for distilled model)"
|
| 237 |
)
|
| 238 |
|
|
|
|
| 239 |
comparison_slider = gr.ImageSlider(
|
| 240 |
label="Before / After",
|
| 241 |
type="pil"
|
| 242 |
)
|
| 243 |
|
|
|
|
|
|
|
|
|
|
| 244 |
seed_output = gr.Number(label="Seed Used", visible=False)
|
| 245 |
|
| 246 |
+
# CHANGED: auto-update dimensions when target video is uploaded/changed
|
| 247 |
+
target_video.change(
|
| 248 |
+
fn=update_dimensions_from_video,
|
| 249 |
+
inputs=[target_video],
|
| 250 |
outputs=[width, height]
|
| 251 |
)
|
| 252 |
|
| 253 |
+
# CHANGED: swap inputs use target_video instead of target_image
|
| 254 |
swap_inputs = [
|
| 255 |
reference_face,
|
| 256 |
+
target_video,
|
| 257 |
seed,
|
| 258 |
randomize_seed,
|
| 259 |
width,
|
|
|
|
| 263 |
]
|
| 264 |
swap_outputs = [comparison_slider, seed_output]
|
| 265 |
|
|
|
|
| 266 |
run_button.click(
|
| 267 |
fn=face_swap,
|
| 268 |
inputs=swap_inputs,
|
| 269 |
outputs=swap_outputs,
|
| 270 |
)
|
| 271 |
|
| 272 |
+
def auto_swap_wrapper(ref_face, targ_vid, s, rand_s, w, h, steps, cfg):
|
| 273 |
+
if ref_face is not None and targ_vid is not None:
|
| 274 |
+
result = face_swap(ref_face, targ_vid, s, rand_s, w, h, steps, cfg)
|
|
|
|
|
|
|
|
|
|
| 275 |
return result[0], result[1], gr.update(visible=True)
|
| 276 |
return None, s, gr.update(visible=False)
|
| 277 |
|
|
|
|
| 278 |
reference_face.change(
|
| 279 |
fn=auto_swap_wrapper,
|
| 280 |
inputs=swap_inputs,
|
| 281 |
outputs=[comparison_slider, seed_output, run_button],
|
| 282 |
)
|
| 283 |
|
| 284 |
+
# CHANGED: trigger on target video upload/change
|
| 285 |
+
target_video.change(
|
| 286 |
fn=auto_swap_wrapper,
|
| 287 |
inputs=swap_inputs,
|
| 288 |
outputs=[comparison_slider, seed_output, run_button],
|