Leteint commited on
Commit
47c5516
·
verified ·
1 Parent(s): 6b374e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -19
app.py CHANGED
@@ -9,9 +9,12 @@ import cv2
9
  from huggingface_hub import hf_hub_download
10
 
11
  from diffusers.models import ControlNetModel
12
- from insightface.app import FaceAnalysis
 
 
 
13
 
14
- from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline
15
 
16
 
17
  # ---------------------------
@@ -54,33 +57,49 @@ IP_ADAPTER_PATH = f"{CHECKPOINT_DIR}/ip-adapter.bin"
54
 
55
 
56
  # ---------------------------
57
- # InsightFace (CPU pour éviter les galères GPU dans ZeroGPU)
58
  # ---------------------------
59
 
60
  def setup_face_analyzer():
 
61
  app = FaceAnalysis(name="buffalo_l")
62
- app.prepare(ctx_id=-1) # CPU
63
  return app
64
 
65
 
66
  face_app = setup_face_analyzer()
67
 
68
 
69
- def extract_face_emb(image: Image.Image):
 
70
  img = np.array(image.convert("RGB"))
71
  img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
72
 
73
  faces = face_app.get(img_bgr)
74
  if len(faces) == 0:
75
- raise RuntimeError("Aucun visage détecté sur l'image de référence.")
76
 
 
77
  face = sorted(
78
  faces,
79
  key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
80
  reverse=True,
81
  )[0]
82
- face_emb = face.normed_embedding
83
- return np.array(face_emb, dtype=np.float32)
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
  # ---------------------------
@@ -116,7 +135,12 @@ pipe = load_pipeline()
116
  # ---------------------------
117
 
118
  @spaces.GPU
119
- def generate(face_image, prompt, negative_prompt="", steps=20, guidance_scale=5.0, height=1024, width=768):
 
 
 
 
 
120
  try:
121
  if face_image is None:
122
  raise gr.Error("Merci de fournir une image de visage.")
@@ -127,17 +151,23 @@ def generate(face_image, prompt, negative_prompt="", steps=20, guidance_scale=5.
127
  else:
128
  pipe.to("cpu")
129
 
130
- # 1) Embedding de visage (ID) depuis face_image
131
- face_emb = extract_face_emb(face_image) # (512,)
132
- face_emb_batch = face_emb[None] # (1, 512)
133
 
134
- # 2) txt2img SDXL + InstantID (pas d'image de corps)
 
 
 
 
135
  out = pipe(
136
  prompt=prompt,
137
  negative_prompt=negative_prompt,
138
- face_embeds=face_emb_batch,
 
139
  num_inference_steps=int(steps),
140
  guidance_scale=float(guidance_scale),
 
141
  height=int(height),
142
  width=int(width),
143
  )
@@ -155,7 +185,7 @@ def generate(face_image, prompt, negative_prompt="", steps=20, guidance_scale=5.
155
  # ---------------------------
156
 
157
  with gr.Blocks() as demo:
158
- gr.Markdown("## InstantID + SDXL (ZeroGPU) – VisageCorps généré")
159
 
160
  with gr.Row():
161
  with gr.Column():
@@ -164,15 +194,21 @@ with gr.Blocks() as demo:
164
  prompt = gr.Textbox(
165
  label="Prompt",
166
  lines=3,
167
- value="photorealistic full body firefighter uniform, detailed, same identity, studio lighting",
 
 
 
168
  )
169
 
170
  neg_prompt = gr.Textbox(
171
  label="Negative",
172
- value="lowres, deformed, extra limbs, bad anatomy, text, watermark",
 
 
 
173
  )
174
 
175
- steps = gr.Slider(5, 50, 25, step=1, label="Steps")
176
  guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale")
177
  height = gr.Slider(640, 1536, 1024, step=64, label="Height")
178
  width = gr.Slider(640, 1024, 768, step=64, label="Width")
@@ -189,4 +225,3 @@ with gr.Blocks() as demo:
189
  )
190
 
191
  demo.launch()
192
-
 
9
  from huggingface_hub import hf_hub_download
10
 
11
  from diffusers.models import ControlNetModel
12
+ from pipeline_stable_diffusion_xl_instantid import (
13
+ StableDiffusionXLInstantIDPipeline,
14
+ draw_kps,
15
+ )
16
 
17
+ from insightface.app import FaceAnalysis
18
 
19
 
20
  # ---------------------------
 
57
 
58
 
59
  # ---------------------------
60
+ # InsightFace (ID de visage)
61
  # ---------------------------
62
 
63
  def setup_face_analyzer():
64
+ # CPU pour éviter les embrouilles GPU dans ZeroGPU
65
  app = FaceAnalysis(name="buffalo_l")
66
+ app.prepare(ctx_id=-1)
67
  return app
68
 
69
 
70
  face_app = setup_face_analyzer()
71
 
72
 
73
+ def get_face_info(image: Image.Image):
74
+ """Retourne (embedding, keypoints) pour le plus grand visage de l'image."""
75
  img = np.array(image.convert("RGB"))
76
  img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
77
 
78
  faces = face_app.get(img_bgr)
79
  if len(faces) == 0:
80
+ raise RuntimeError("Aucun visage détecté sur l'image.")
81
 
82
+ # On prend le plus grand visage
83
  face = sorted(
84
  faces,
85
  key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
86
  reverse=True,
87
  )[0]
88
+
89
+ # Selon la version d'insightface, accès par attribut ou dict
90
+ emb = getattr(face, "normed_embedding", None)
91
+ if emb is None and isinstance(face, dict):
92
+ emb = face.get("embedding", None)
93
+ if emb is None:
94
+ raise RuntimeError("Impossible de récupérer l'embedding du visage.")
95
+
96
+ kps = getattr(face, "kps", None)
97
+ if kps is None and isinstance(face, dict):
98
+ kps = face.get("kps", None)
99
+ if kps is None:
100
+ raise RuntimeError("Impossible de récupérer les keypoints du visage.")
101
+
102
+ return np.array(emb, dtype=np.float32), np.array(kps)
103
 
104
 
105
  # ---------------------------
 
135
  # ---------------------------
136
 
137
  @spaces.GPU
138
+ def generate(face_image, prompt, negative_prompt="", steps=30, guidance_scale=5.0, height=1024, width=768):
139
+ """
140
+ face_image : image contenant le visage de référence
141
+ prompt : description du corps, tenue, décor, style
142
+ """
143
+
144
  try:
145
  if face_image is None:
146
  raise gr.Error("Merci de fournir une image de visage.")
 
151
  else:
152
  pipe.to("cpu")
153
 
154
+ # 1) Embedding + keypoints du visage
155
+ face_emb, face_kps = get_face_info(face_image) # (512,), (5,2) typiquement
156
+ face_emb_batch = face_emb[None] # (1,512)
157
 
158
+ # 2) Génération de l'image de keypoints (condition ControlNet)
159
+ kps_image = draw_kps(face_image, face_kps) # PIL.Image, comme dans l'exemple officiel
160
+
161
+ # 3) Appel du pipeline InstantID SDXL
162
+ # Note : image_embeds = face_emb, image = kps_image (comme dans l'exemple)
163
  out = pipe(
164
  prompt=prompt,
165
  negative_prompt=negative_prompt,
166
+ image=kps_image,
167
+ image_embeds=face_emb_batch,
168
  num_inference_steps=int(steps),
169
  guidance_scale=float(guidance_scale),
170
+ controlnet_conditioning_scale=0.8,
171
  height=int(height),
172
  width=int(width),
173
  )
 
185
  # ---------------------------
186
 
187
  with gr.Blocks() as demo:
188
+ gr.Markdown("## InstantID + SDXL (ZeroGPU) – 1 image visage corps généré")
189
 
190
  with gr.Row():
191
  with gr.Column():
 
194
  prompt = gr.Textbox(
195
  label="Prompt",
196
  lines=3,
197
+ value=(
198
+ "photorealistic full body firefighter uniform, helmet, reflective stripes, "
199
+ "standing in front of a fire truck at night, cinematic lighting, same identity"
200
+ ),
201
  )
202
 
203
  neg_prompt = gr.Textbox(
204
  label="Negative",
205
+ value=(
206
+ "lowres, deformed, extra limbs, bad anatomy, text, watermark, "
207
+ "blurry, cartoon, drawing, illustration"
208
+ ),
209
  )
210
 
211
+ steps = gr.Slider(5, 60, 30, step=1, label="Steps")
212
  guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale")
213
  height = gr.Slider(640, 1536, 1024, step=64, label="Height")
214
  width = gr.Slider(640, 1024, 768, step=64, label="Width")
 
225
  )
226
 
227
  demo.launch()