linoyts HF Staff commited on
Commit
0c6554e
·
verified ·
1 Parent(s): 276e3cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -72
app.py CHANGED
@@ -7,21 +7,18 @@ import torch
7
  from diffusers import Flux2KleinPipeline
8
  from PIL import Image
9
 
 
 
 
10
  dtype = torch.bfloat16
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
 
13
  MAX_SEED = np.iinfo(np.int32).max
14
 
15
- # Model repository ID for 9B distilled
16
  REPO_ID_DISTILLED = "black-forest-labs/FLUX.2-klein-9B"
17
-
18
- # LoRA repository and file
19
  LORA_REPO_ID = "Alissonerdx/BFS-Best-Face-Swap"
20
  LORA_FILENAME = "bfs_head_v1_flux-klein_9b_step3750_rank64.safetensors"
21
 
22
- # Fixed prompt for face swapping
23
- #FACE_SWAP_PROMPT = "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, nose structure of Picture 2. copy the direction of the eye, head rotation, micro expressions from Picture 1, high quality, sharp details, 4k."
24
-
25
  FACE_SWAP_PROMPT = """head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. Remove the head from Picture 1 completely and replace it with the head from Picture 2.
26
 
27
  FROM PICTURE 1 (strictly preserve):
@@ -45,24 +42,60 @@ print(f"Loading LoRA from {LORA_REPO_ID}...")
45
  pipe.load_lora_weights(LORA_REPO_ID, weight_name=LORA_FILENAME)
46
  print("LoRA loaded successfully!")
47
 
48
- def update_dimensions_from_image(target_image):
 
49
  """
50
- Update width/height based on target image aspect ratio.
51
 
52
- Keeps one side at 1024 and scales the other proportionally,
53
- with both sides as multiples of 8.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- Args:
56
- target_image: PIL Image of the target/body image.
57
 
58
- Returns:
59
- tuple: A tuple of (width, height) integers, both multiples of 8.
 
 
 
 
 
 
 
 
 
 
60
  """
 
 
 
 
 
 
61
  if target_image is None:
62
- return 1024, 1024 # Default dimensions
63
 
64
  img_width, img_height = target_image.size
65
-
66
  aspect_ratio = img_width / img_height
67
 
68
  if aspect_ratio >= 1: # Landscape or square
@@ -72,11 +105,9 @@ def update_dimensions_from_image(target_image):
72
  new_height = 1024
73
  new_width = int(1024 * aspect_ratio)
74
 
75
- # Round to nearest multiple of 8
76
  new_width = round(new_width / 8) * 8
77
  new_height = round(new_height / 8) * 8
78
 
79
- # Ensure within valid range (minimum 256, maximum 1024)
80
  new_width = max(256, min(1024, new_width))
81
  new_height = max(256, min(1024, new_height))
82
 
@@ -86,7 +117,7 @@ def update_dimensions_from_image(target_image):
86
  @spaces.GPU(duration=85)
87
  def face_swap(
88
  reference_face: Image.Image,
89
- target_image: Image.Image,
90
  seed: int = 42,
91
  randomize_seed: bool = False,
92
  width: int = 1024,
@@ -95,32 +126,17 @@ def face_swap(
95
  guidance_scale: float = 1.0,
96
  progress=gr.Progress(track_tqdm=True)
97
  ):
98
- """
99
- Perform face swapping using FLUX.2 Klein 9B with LoRA.
100
-
101
- Args:
102
- reference_face: The face image to swap in (Picture 2).
103
- target_image: The target body/base image (Picture 1).
104
- seed: Random seed for reproducible generation.
105
- randomize_seed: Set to True to use a random seed.
106
- width: Output image width in pixels (256-1024, must be multiple of 8).
107
- height: Output image height in pixels (256-1024, must be multiple of 8).
108
- num_inference_steps: Number of denoising steps (default 4 for distilled).
109
- guidance_scale: How closely to follow the prompt (default 1.0 for distilled).
110
-
111
- Returns:
112
- tuple: A tuple containing the generated PIL Image and the seed used.
113
- """
114
- if reference_face is None or target_image is None:
115
- raise gr.Error("Please provide both a reference face and a target image!")
116
 
117
  if randomize_seed:
118
  seed = random.randint(0, MAX_SEED)
119
 
120
  generator = torch.Generator(device=device).manual_seed(seed)
121
 
122
- # Important: Pass target image (body) first, then reference face
123
- # This matches the prompt structure: Picture 1 = target, Picture 2 = reference
124
  image_list = [target_image, reference_face]
125
 
126
  progress(0.2, desc="Swapping face...")
@@ -135,7 +151,6 @@ def face_swap(
135
  generator=generator,
136
  ).images[0]
137
 
138
- # Return slider comparison (before, after) and seed
139
  return (target_image, image), seed
140
 
141
 
@@ -150,7 +165,6 @@ css = """
150
  """
151
 
152
  with gr.Blocks(css=css) as demo:
153
-
154
  with gr.Column(elem_id="col-container"):
155
  gr.Markdown("""# Face Swap with FLUX.2 Klein 9B
156
 
@@ -166,14 +180,15 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
166
  sources=["upload"],
167
  elem_classes="image-container"
168
  )
169
-
170
- target_image = gr.Image(
171
- label="Target Image (Body/Scene)",
172
- type="pil",
173
- sources=["upload"],
174
- elem_classes="image-container"
175
  )
 
176
  run_button = gr.Button("Swap Face", visible=False)
 
177
  with gr.Accordion("Advanced Settings", open=False):
178
  seed = gr.Slider(
179
  label="Seed",
@@ -182,9 +197,9 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
182
  step=1,
183
  value=0,
184
  )
185
-
186
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
187
-
188
  with gr.Row():
189
  width = gr.Slider(
190
  label="Width",
@@ -193,7 +208,7 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
193
  step=8,
194
  value=1024,
195
  )
196
-
197
  height = gr.Slider(
198
  label="Height",
199
  minimum=256,
@@ -201,7 +216,7 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
201
  step=8,
202
  value=1024,
203
  )
204
-
205
  with gr.Row():
206
  num_inference_steps = gr.Slider(
207
  label="Inference Steps",
@@ -211,7 +226,7 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
211
  value=4,
212
  info="Number of denoising steps (4 is optimal for distilled model)"
213
  )
214
-
215
  guidance_scale = gr.Slider(
216
  label="Guidance Scale",
217
  minimum=0.0,
@@ -221,28 +236,24 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
221
  info="How closely to follow the prompt (1.0 is optimal for distilled model)"
222
  )
223
 
224
-
225
  comparison_slider = gr.ImageSlider(
226
  label="Before / After",
227
  type="pil"
228
  )
229
 
230
-
231
-
232
-
233
  seed_output = gr.Number(label="Seed Used", visible=False)
234
 
235
- # Auto-update dimensions when target image is uploaded
236
- target_image.upload(
237
- fn=update_dimensions_from_image,
238
- inputs=[target_image],
239
  outputs=[width, height]
240
  )
241
 
242
- # Create a shared input/output configuration
243
  swap_inputs = [
244
  reference_face,
245
- target_image,
246
  seed,
247
  randomize_seed,
248
  width,
@@ -252,31 +263,26 @@ Swap faces using Flux.2 Klein 9B [Alissonerdx/BFS-Best-Face-Swap](https://huggin
252
  ]
253
  swap_outputs = [comparison_slider, seed_output]
254
 
255
- # Manual trigger via button
256
  run_button.click(
257
  fn=face_swap,
258
  inputs=swap_inputs,
259
  outputs=swap_outputs,
260
  )
261
 
262
- # Auto-trigger when both images are uploaded
263
- def auto_swap_wrapper(ref_face, target_img, s, rand_s, w, h, steps, cfg):
264
- """Only run face swap if both images are provided"""
265
- if ref_face is not None and target_img is not None:
266
- result = face_swap(ref_face, target_img, s, rand_s, w, h, steps, cfg)
267
- # Show the button after first generation
268
  return result[0], result[1], gr.update(visible=True)
269
  return None, s, gr.update(visible=False)
270
 
271
- # Trigger on reference face upload/change
272
  reference_face.change(
273
  fn=auto_swap_wrapper,
274
  inputs=swap_inputs,
275
  outputs=[comparison_slider, seed_output, run_button],
276
  )
277
 
278
- # Trigger on target image upload/change
279
- target_image.change(
280
  fn=auto_swap_wrapper,
281
  inputs=swap_inputs,
282
  outputs=[comparison_slider, seed_output, run_button],
 
7
  from diffusers import Flux2KleinPipeline
8
  from PIL import Image
9
 
10
+ # NEW: for reading first frame
11
+ import cv2
12
+
13
  dtype = torch.bfloat16
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
  MAX_SEED = np.iinfo(np.int32).max
17
 
 
18
  REPO_ID_DISTILLED = "black-forest-labs/FLUX.2-klein-9B"
 
 
19
  LORA_REPO_ID = "Alissonerdx/BFS-Best-Face-Swap"
20
  LORA_FILENAME = "bfs_head_v1_flux-klein_9b_step3750_rank64.safetensors"
21
 
 
 
 
22
  FACE_SWAP_PROMPT = """head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. Remove the head from Picture 1 completely and replace it with the head from Picture 2.
23
 
24
  FROM PICTURE 1 (strictly preserve):
 
42
  pipe.load_lora_weights(LORA_REPO_ID, weight_name=LORA_FILENAME)
43
  print("LoRA loaded successfully!")
44
 
45
+
46
+ def first_frame_from_video(video_value) -> Image.Image:
47
  """
48
+ Convert a Gradio Video input into the first frame as a PIL Image.
49
 
50
+ Gradio Video value is typically either:
51
+ - a filepath string
52
+ - or a dict-like object with a path
53
+ - or (older versions) a tuple
54
+ We handle the common cases robustly.
55
+ """
56
+ if video_value is None:
57
+ return None
58
+
59
+ video_path = None
60
+
61
+ # Common shapes across gradio versions:
62
+ if isinstance(video_value, str):
63
+ video_path = video_value
64
+ elif isinstance(video_value, dict) and "path" in video_value:
65
+ video_path = video_value["path"]
66
+ elif isinstance(video_value, (list, tuple)) and len(video_value) > 0:
67
+ # sometimes it's (path, metadata) or [path, ...]
68
+ video_path = video_value[0]
69
+ else:
70
+ # last attempt: attribute access
71
+ video_path = getattr(video_value, "path", None)
72
 
73
+ if not video_path or not os.path.exists(video_path):
74
+ raise gr.Error("Could not read the uploaded video file.")
75
 
76
+ cap = cv2.VideoCapture(video_path)
77
+ ok, frame_bgr = cap.read()
78
+ cap.release()
79
+
80
+ if not ok or frame_bgr is None:
81
+ raise gr.Error("Could not extract the first frame from the video.")
82
+
83
+ frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
84
+ return Image.fromarray(frame_rgb)
85
+
86
+
87
+ def update_dimensions_from_video(target_video):
88
  """
89
+ Update width/height based on the first frame of the uploaded video.
90
+
91
+ Keeps one side at 1024 and scales the other proportionally,
92
+ with both sides as multiples of 8.
93
+ """
94
+ target_image = first_frame_from_video(target_video)
95
  if target_image is None:
96
+ return 1024, 1024
97
 
98
  img_width, img_height = target_image.size
 
99
  aspect_ratio = img_width / img_height
100
 
101
  if aspect_ratio >= 1: # Landscape or square
 
105
  new_height = 1024
106
  new_width = int(1024 * aspect_ratio)
107
 
 
108
  new_width = round(new_width / 8) * 8
109
  new_height = round(new_height / 8) * 8
110
 
 
111
  new_width = max(256, min(1024, new_width))
112
  new_height = max(256, min(1024, new_height))
113
 
 
117
  @spaces.GPU(duration=85)
118
  def face_swap(
119
  reference_face: Image.Image,
120
+ target_video, # CHANGED: now a video input
121
  seed: int = 42,
122
  randomize_seed: bool = False,
123
  width: int = 1024,
 
126
  guidance_scale: float = 1.0,
127
  progress=gr.Progress(track_tqdm=True)
128
  ):
129
+ if reference_face is None or target_video is None:
130
+ raise gr.Error("Please provide both a reference face and a target video!")
131
+
132
+ # Extract first frame to use as target image
133
+ target_image = first_frame_from_video(target_video)
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  if randomize_seed:
136
  seed = random.randint(0, MAX_SEED)
137
 
138
  generator = torch.Generator(device=device).manual_seed(seed)
139
 
 
 
140
  image_list = [target_image, reference_face]
141
 
142
  progress(0.2, desc="Swapping face...")
 
151
  generator=generator,
152
  ).images[0]
153
 
 
154
  return (target_image, image), seed
155
 
156
 
 
165
  """
166
 
167
  with gr.Blocks(css=css) as demo:
 
168
  with gr.Column(elem_id="col-container"):
169
  gr.Markdown("""# Face Swap with FLUX.2 Klein 9B
170
 
 
180
  sources=["upload"],
181
  elem_classes="image-container"
182
  )
183
+
184
+ # CHANGED: target image -> target video
185
+ target_video = gr.Video(
186
+ label="Target Video (Body/Scene) - first frame will be used",
187
+ sources=["upload"]
 
188
  )
189
+
190
  run_button = gr.Button("Swap Face", visible=False)
191
+
192
  with gr.Accordion("Advanced Settings", open=False):
193
  seed = gr.Slider(
194
  label="Seed",
 
197
  step=1,
198
  value=0,
199
  )
200
+
201
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
202
+
203
  with gr.Row():
204
  width = gr.Slider(
205
  label="Width",
 
208
  step=8,
209
  value=1024,
210
  )
211
+
212
  height = gr.Slider(
213
  label="Height",
214
  minimum=256,
 
216
  step=8,
217
  value=1024,
218
  )
219
+
220
  with gr.Row():
221
  num_inference_steps = gr.Slider(
222
  label="Inference Steps",
 
226
  value=4,
227
  info="Number of denoising steps (4 is optimal for distilled model)"
228
  )
229
+
230
  guidance_scale = gr.Slider(
231
  label="Guidance Scale",
232
  minimum=0.0,
 
236
  info="How closely to follow the prompt (1.0 is optimal for distilled model)"
237
  )
238
 
 
239
  comparison_slider = gr.ImageSlider(
240
  label="Before / After",
241
  type="pil"
242
  )
243
 
 
 
 
244
  seed_output = gr.Number(label="Seed Used", visible=False)
245
 
246
+ # CHANGED: auto-update dimensions when target video is uploaded/changed
247
+ target_video.change(
248
+ fn=update_dimensions_from_video,
249
+ inputs=[target_video],
250
  outputs=[width, height]
251
  )
252
 
253
+ # CHANGED: swap inputs use target_video instead of target_image
254
  swap_inputs = [
255
  reference_face,
256
+ target_video,
257
  seed,
258
  randomize_seed,
259
  width,
 
263
  ]
264
  swap_outputs = [comparison_slider, seed_output]
265
 
 
266
  run_button.click(
267
  fn=face_swap,
268
  inputs=swap_inputs,
269
  outputs=swap_outputs,
270
  )
271
 
272
+ def auto_swap_wrapper(ref_face, targ_vid, s, rand_s, w, h, steps, cfg):
273
+ if ref_face is not None and targ_vid is not None:
274
+ result = face_swap(ref_face, targ_vid, s, rand_s, w, h, steps, cfg)
 
 
 
275
  return result[0], result[1], gr.update(visible=True)
276
  return None, s, gr.update(visible=False)
277
 
 
278
  reference_face.change(
279
  fn=auto_swap_wrapper,
280
  inputs=swap_inputs,
281
  outputs=[comparison_slider, seed_output, run_button],
282
  )
283
 
284
+ # CHANGED: trigger on target video upload/change
285
+ target_video.change(
286
  fn=auto_swap_wrapper,
287
  inputs=swap_inputs,
288
  outputs=[comparison_slider, seed_output, run_button],