cngsm commited on
Commit
e370e60
·
verified ·
1 Parent(s): d282009

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +391 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import numpy as np
4
+ from PIL import Image
5
+ import mediapipe as mp
6
+ import cv2
7
+ import json, os, io, uuid, time
8
+ from typing import Dict, Any, List, Tuple
9
+
10
+ # -----------------------------
11
+ # Geometry helpers
12
+ # -----------------------------
13
+ def dist2D(a, b):
14
+ return ((a[0]-b[0])**2 + (a[1]-b[1])**2) ** 0.5
15
+
16
+ # -----------------------------
17
+ # Face mesh indices (subset)
18
+ # -----------------------------
19
+ LM = dict(
20
+ leftEyeOuter=33,
21
+ rightEyeOuter=263,
22
+ noseLeft=97,
23
+ noseRight=326,
24
+ noseTip=1,
25
+ chin=152,
26
+ midForehead=10,
27
+ )
28
+
29
+ # Oval for contours (subset path)
30
+ FACEMESH_FACE_OVAL = [
31
+ [10, 338], [338, 297], [297, 332], [332, 284], [284, 251], [251, 389],
32
+ [389, 356], [356, 454], [454, 323], [323, 361], [361, 288], [288, 397],
33
+ [397, 365], [365, 379], [379, 378], [378, 400], [400, 377], [377, 152],
34
+ [152, 148], [148, 176], [176, 149], [149, 138], [138, 172], [172, 136],
35
+ [136, 150], [150, 149], [149, 176], [176, 148], [148, 152], [152, 377],
36
+ [377, 400], [400, 378], [378, 379], [379, 365], [365, 397], [397, 288],
37
+ [288, 361], [361, 323], [323, 454], [454, 356], [356, 389], [389, 251],
38
+ [251, 284], [284, 332], [332, 297], [297, 338], [338, 10]
39
+ ]
40
+
41
+ LEFT_RIGHT_PAIRS = [
42
+ (33, 263), # outer eyes
43
+ (159, 386), # inner eyelids approx
44
+ (70, 300), # brows
45
+ (36, 266), # cheeks
46
+ (50, 280), # mid cheeks
47
+ (234, 454), # jaw
48
+ ]
49
+
50
+ # -----------------------------
51
+ # MediaPipe processing
52
+ # -----------------------------
53
+ def image_to_rgb(np_img):
54
+ if np_img.ndim == 2:
55
+ return cv2.cvtColor(np_img, cv2.COLOR_GRAY2RGB)
56
+ if np_img.shape[2] == 4:
57
+ return cv2.cvtColor(np_img, cv2.COLOR_BGRA2RGB)
58
+ return cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)
59
+
60
+ def extract_landmarks(np_img):
61
+ img_rgb = image_to_rgb(np_img)
62
+ h, w = img_rgb.shape[:2]
63
+ mp_face_mesh = mp.solutions.face_mesh
64
+ with mp_face_mesh.FaceMesh(static_image_mode=True, refine_landmarks=True, max_num_faces=1) as face_mesh:
65
+ res = face_mesh.process(img_rgb)
66
+ if not res.multi_face_landmarks:
67
+ return None, None, (w, h)
68
+ lms = res.multi_face_landmarks[0].landmark
69
+ kps = [(lm.x * w, lm.y * h, lm.z) for lm in lms]
70
+ xs = [p[0] for p in kps]; ys = [p[1] for p in kps]
71
+ x_min, x_max = max(0, int(np.min(xs))), min(w-1, int(np.max(xs)))
72
+ y_min, y_max = max(0, int(np.min(ys))), min(h-1, int(np.max(ys)))
73
+ bbox = (x_min, y_min, x_max-x_min, y_max-y_min)
74
+ return kps, bbox, (w, h)
75
+
76
+ # -----------------------------
77
+ # Symmetry estimation (automatic fallback)
78
+ # -----------------------------
79
+ def compute_symmetry_scores(kps, bbox):
80
+ if kps is None:
81
+ return {"enabled": False, "note": "sem rosto detectado"}
82
+ x, y, bw, bh = bbox
83
+ cx = x + bw / 2.0
84
+ # measure mirrored pair distances normalized by bbox width
85
+ pair_diffs = []
86
+ for a, b in LEFT_RIGHT_PAIRS:
87
+ if a < len(kps) and b < len(kps):
88
+ xa, ya, _ = kps[a]; xb, yb, _ = kps[b]
89
+ # distance from midline for each
90
+ da = abs(xa - cx)
91
+ db = abs(xb - cx)
92
+ pair_diffs.append(abs(da - db) / max(bw, 1.0))
93
+ if not pair_diffs:
94
+ score = 0.0
95
+ else:
96
+ score = max(0.0, 1.0 - float(np.mean(pair_diffs))*2.0) # crude normalization
97
+ return {
98
+ "enabled": True,
99
+ "symmetry_score": round(score, 3),
100
+ "method": "midline-distance left/right pairs (normalized)",
101
+ "pairs_used": len(pair_diffs)
102
+ }
103
+
104
+ def load_symmetry_json(fileobj):
105
+ try:
106
+ if fileobj is None:
107
+ return None
108
+ if isinstance(fileobj, str) and os.path.isfile(fileobj):
109
+ with open(fileobj, "r", encoding="utf-8") as f:
110
+ return json.load(f)
111
+ # gradio may pass a dict with "name"/"data" or a tempfile path
112
+ if hasattr(fileobj, "name") and os.path.isfile(fileobj.name):
113
+ with open(fileobj.name, "r", encoding="utf-8") as f:
114
+ return json.load(f)
115
+ # If it's bytes / file-like
116
+ if hasattr(fileobj, "read"):
117
+ data = fileobj.read()
118
+ return json.loads(data.decode("utf-8"))
119
+ except Exception as e:
120
+ return {"enabled": False, "error": f"Falha ao ler simetria: {e}"}
121
+ return None
122
+
123
+ # -----------------------------
124
+ # Simple rule-based parser for description (PT-BR)
125
+ # -----------------------------
126
+ def parse_description(text: str) -> Dict[str, Any]:
127
+ t = (text or "").lower()
128
+
129
+ def has(*keys):
130
+ return any(k in t for k in keys)
131
+
132
+ def extract_between(prefix, suffix):
133
+ # utility (very simple) for patterns like "olhos ... castanhos"
134
+ try:
135
+ i = t.index(prefix) + len(prefix)
136
+ j = t.index(suffix, i)
137
+ return t[i:j].strip()
138
+ except Exception:
139
+ return None
140
+
141
+ # person
142
+ age = "indefinido"
143
+ if has("45", "55", "meia-idade", "maduro"):
144
+ age = "45-55"
145
+ elif has("adulto jovem", "20", "30"):
146
+ age = "adulto jovem"
147
+
148
+ gender = "indefinido"
149
+ if has("masculino", "homem"):
150
+ gender = "masculino"
151
+ if has("feminino", "mulher"):
152
+ gender = "feminino"
153
+
154
+ # face shape
155
+ shape = "oval" if has("rosto oval", "formato oval", "oval") else "indefinido"
156
+
157
+ # skin
158
+ skin_tone = "clara" if has("pele clara") else ("escura" if has("pele escura") else "média" if has("pele média") else "indefinido")
159
+ skin_texture = "lisa" if has("pele lisa", "textura lisa") else "natural"
160
+
161
+ # eyes
162
+ eye_shape = "amendoado" if has("olhos amendoados") else "indefinido"
163
+ eye_color = None
164
+ if has("olhos castanhos", "castanha escura", "castanho escuro"):
165
+ eye_color = "castanho escuro"
166
+ elif has("olhos azuis"):
167
+ eye_color = "azul"
168
+ elif has("olhos verdes"):
169
+ eye_color = "verde"
170
+ eye_expr = "confiante" if has("confiança", "confiante") else ("amigável" if has("amigável") else "neutro")
171
+
172
+ # eyebrows
173
+ brow = "arqueadas moderadas" if has("sobrancelhas arqueadas") else ("grossas" if has("sobrancelhas grossas") else ("finas" if has("sobrancelhas finas") else "naturais"))
174
+
175
+ # nose
176
+ nose_shape = "reto" if has("nariz reto") else "proporcional"
177
+ nose_size = "médio" if has("tamanho médio") else "indefinido"
178
+
179
+ # mouth
180
+ mouth_shape = "lábios finos" if has("lábios finos") else "natural"
181
+ mouth_expr = "sorriso sutil" if has("sorriso sutil", "sorriso suave") else ("neutra" if has("expressão neutra") else "serena")
182
+
183
+ # hair
184
+ hair_length = "curto" if has("cabelo curto") else "indefinido"
185
+ hair_style = "penteado para trás" if has("penteado para trás") else "indefinido"
186
+ hair_color = "castanho escuro" if has("castanha escura", "castanho escuro") else "indefinido"
187
+
188
+ # cheeks / jaw
189
+ cheeks = "maçãs do rosto proeminentes" if has("maçãs do rosto são proeminentes", "maçãs do rosto proeminentes") else "bochechas suaves"
190
+ jawline = "mandíbula bem definida" if has("mandíbula bem definida") else "indefinida"
191
+ chin = "queixo arredondado" if has("queixo é arredondado", "queixo arredondado") else "indefinido"
192
+ forehead = "testa alta" if has("testa é alta", "testa alta") else "indefinida"
193
+
194
+ # pose / gaze
195
+ pose_head_tilt = "cabeça ereta" if has("postura é ereta") else "ligeiramente inclinada" if has("ligeiramente inclinada") else "indefinido"
196
+ gaze = "olhar direto" if has("olhar direto", "apresentação", "discursando") else "neutro"
197
+
198
+ # lighting
199
+ lighting = "frontal suave" if has("iluminação", "fundo neutro") else "indefinido"
200
+
201
+ # attire / context
202
+ attire = []
203
+ if has("camisa social preta"): attire.append("camisa social preta")
204
+ if has("paletó cinza"): attire.append("paletó cinza de um botão")
205
+ attire = ", ".join(attire) if attire else ("formal" if has("paletó", "terno", "social") else "casual")
206
+ accessories = "nenhum" if has("sem óculos", "sem brincos", "não há acessórios") else "indefinido"
207
+ background = "fundo neutro" if has("fundo neutro") else "indefinido"
208
+
209
+ # high-level impression
210
+ impression = "confiança e seriedade, com um sorriso sutil" if has("confiança", "seriedade", "sorriso sutil", "sorriso suave") else "neutra"
211
+
212
+ return {
213
+ "person": {"age": age, "gender": gender},
214
+ "facial_structure": {
215
+ "overall_shape": f"formato {shape}" if shape!="indefinido" else "indefinido",
216
+ "proportions": "proporções equilibradas",
217
+ "skin_texture": f"pele {skin_texture}",
218
+ "skin_tone": skin_tone,
219
+ "jawline": jawline,
220
+ "chin": chin,
221
+ "forehead": forehead
222
+ },
223
+ "features": {
224
+ "eyes": {"shape": eye_shape if eye_shape!="indefinido" else "olhos expressivos",
225
+ "color": eye_color or "indefinido",
226
+ "expression": eye_expr,
227
+ "eyebrows": brow},
228
+ "nose": {"shape": nose_shape, "size": nose_size},
229
+ "mouth": {"shape": mouth_shape, "expression": mouth_expr},
230
+ "hair": {"length": hair_length, "style": hair_style, "color": hair_color},
231
+ "cheeks": {"shape": cheeks}
232
+ },
233
+ "pose": {"head_tilt": pose_head_tilt, "gaze": gaze},
234
+ "lighting": {"direction": lighting},
235
+ "contextual_details": {"attire": attire, "accessories": accessories, "background": background},
236
+ "impression": impression
237
+ }
238
+
239
+ # -----------------------------
240
+ # Merge metrics + description + symmetry
241
+ # -----------------------------
242
+ def build_structured_json(desc_struct, metrics, symmetry_block):
243
+ # expression metrics can mix subjective + objective placeholders
244
+ expr = {
245
+ "happiness_score": 0.6 if "sorriso" in (desc_struct.get("impression","") or "") else 0.4,
246
+ "calmness_score": 0.8
247
+ }
248
+ if symmetry_block and isinstance(symmetry_block, dict):
249
+ expr["symmetry_score"] = symmetry_block.get("symmetry_score", None)
250
+
251
+ out = {
252
+ **{k: v for k, v in desc_struct.items() if k not in ("impression",)},
253
+ "expression_metrics": expr,
254
+ "symmetry": symmetry_block if symmetry_block else {"enabled": False},
255
+ "metrics": metrics or {}
256
+ }
257
+ return out
258
+
259
+ def metrics_from_landmarks(kps, bbox, img_size):
260
+ if kps is None:
261
+ return {"error": "Nenhum rosto detectado"}
262
+ (w, h) = img_size
263
+ x, y, bw, bh = bbox
264
+ eyeL = kps[LM["leftEyeOuter"]]; eyeR = kps[LM["rightEyeOuter"]]
265
+ noseL = kps[LM["noseLeft"]]; noseR = kps[LM["noseRight"]]
266
+ interocular_px = dist2D(eyeL, eyeR)
267
+ nose_width_px = dist2D(noseL, noseR)
268
+ interocular_norm = interocular_px / max(bw, 1.0)
269
+ nose_width_norm = nose_width_px / max(bw, 1.0)
270
+
271
+ return {
272
+ "image": {"width": int(w), "height": int(h)},
273
+ "bbox": {"x": int(x), "y": int(y), "w": int(bw), "h": int(bh)},
274
+ "distances": {
275
+ "interocular_px": round(float(interocular_px), 2),
276
+ "interocular_norm": round(float(interocular_norm), 3),
277
+ "nose_width_px": round(float(nose_width_px), 2),
278
+ "nose_width_norm": round(float(nose_width_norm), 3),
279
+ },
280
+ }
281
+
282
+ # -----------------------------
283
+ # Main pipeline
284
+ # -----------------------------
285
+ def process(image, description_text, symmetry_json_file):
286
+ # 1) Prepare image np array
287
+ if image is None:
288
+ return "Envie uma imagem.", None, None
289
+ if isinstance(image, dict) and "image" in image:
290
+ np_img = image["image"]
291
+ else:
292
+ np_img = np.array(image)
293
+
294
+ # 2) Landmarks
295
+ kps, bbox, img_size = extract_landmarks(np_img)
296
+ metrics = metrics_from_landmarks(kps, bbox, img_size)
297
+
298
+ # 3) Symmetry: load file or compute automatic
299
+ user_sym = load_symmetry_json(symmetry_json_file)
300
+ if user_sym is None or (isinstance(user_sym, dict) and user_sym.get("enabled") is False and "error" in user_sym):
301
+ auto_sym = compute_symmetry_scores(kps, bbox) if kps is not None else {"enabled": False}
302
+ symmetry_block = auto_sym
303
+ symmetry_origin = "auto"
304
+ else:
305
+ # validate presence of required keys minimally
306
+ if isinstance(user_sym, dict) and ("symmetry_score" in user_sym or "enabled" in user_sym):
307
+ symmetry_block = user_sym
308
+ else:
309
+ symmetry_block = {"enabled": True, "custom": user_sym}
310
+ symmetry_origin = "uploaded"
311
+
312
+ # 4) Parse description
313
+ desc_struct = parse_description(description_text or "")
314
+
315
+ # 4.5) Prepare landmarks list for output
316
+ landmarks_list = []
317
+ if kps is not None:
318
+ for (px, py, pz) in kps:
319
+ landmarks_list.append({"x": round(float(px), 3), "y": round(float(py), 3), "z": round(float(pz), 5)})
320
+
321
+ # 4.7) Compute face_signature hash
322
+ face_signature = None
323
+ if kps is not None:
324
+ # serialize landmarks for hash
325
+ lm_bytes = json.dumps(landmarks_list, ensure_ascii=False, sort_keys=True).encode("utf-8")
326
+ face_signature = hashlib.sha256(lm_bytes).hexdigest()
327
+
328
+ # 5) Merge all
329
+ structured = build_structured_json(desc_struct, metrics, symmetry_block)
330
+ structured["face_landmarks"] = landmarks_list
331
+ if face_signature:
332
+ structured["face_signature"] = face_signature
333
+
334
+ # 6) Compose Veo 3 scene JSON
335
+ scene = {
336
+ "face_landmarks": landmarks_list,
337
+ "project": {
338
+ "id": f"veo3_face_clone_{uuid.uuid4().hex[:8]}",
339
+ "version": "1.0",
340
+ "created_at": int(time.time())
341
+ },
342
+ "structured_description": structured,
343
+ "controls": {
344
+ "seed": 123456789,
345
+ "clone_mode": "photorealistic",
346
+ "symmetry_origin": symmetry_origin,
347
+ "face_signature": face_signature if face_signature else None
348
+ },
349
+ "prompt": {
350
+ "summary": "Retrato frontal, fidelidade máxima ao rosto de referência, iluminação neutra, fundo limpo.",
351
+ "hints": [
352
+ "respeitar medidas faciais normalizadas (interocular, largura do nariz)",
353
+ "reproduzir textura e cor do cabelo conforme descrição",
354
+ "manter expressão: sorriso sutil e olhar direto quando indicado"
355
+ ]
356
+ }
357
+ }
358
+
359
+ # 7) Save JSONs
360
+ path_landmarks = os.path.join(out_dir, "face_landmarks.json")
361
+ with open(path_landmarks, "w", encoding="utf-8") as f:
362
+ json.dump(landmarks_list, f, ensure_ascii=False, indent=2)
363
+ out_dir = os.path.join("/tmp", f"face_clone_{uuid.uuid4().hex[:8]}")
364
+ os.makedirs(out_dir, exist_ok=True)
365
+ path_struct = os.path.join(out_dir, "face_description_structured.json")
366
+ path_scene = os.path.join(out_dir, "veo3_scene.json")
367
+ with open(path_struct, "w", encoding="utf-8") as f:
368
+ json.dump(structured, f, ensure_ascii=False, indent=2)
369
+ with open(path_scene, "w", encoding="utf-8") as f:
370
+ json.dump(scene, f, ensure_ascii=False, indent=2)
371
+
372
+ # 8) Return
373
+ return json.dumps(structured, ensure_ascii=False, indent=2), path_struct, path_scene, path_landmarks
374
+
375
+ with gr.Blocks(title="Face Clone JSON Builder • Universal") as demo:
376
+ gr.Markdown("## Face Clone JSON Builder (Universal)\nEnvie imagem + descrição + JSON de simetria (opcional). O app funde tudo e gera um JSON pronto para Veo 3.")
377
+ with gr.Row():
378
+ image = gr.Image(type="numpy", label="Imagem (upload/câmera)")
379
+ sym = gr.File(label="Simetria (*.json) — opcional")
380
+ desc = gr.Textbox(lines=12, label="Descrição detalhada (PT/BR)", placeholder="Cole aqui a descrição: formato do rosto, olhos, nariz, boca, cabelo, pele, idade/gênero, postura, vestimenta, expressão...")
381
+
382
+ btn = gr.Button("Gerar JSON para Veo 3")
383
+ struct_json = gr.Code(label="face_description_structured.json (preview)", language="json")
384
+ file_struct = gr.File(label="Baixar face_description_structured.json")
385
+ file_scene = gr.File(label="Baixar veo3_scene.json")
386
+ file_landmarks = gr.File(label="Baixar face_landmarks.json")
387
+
388
+ btn.click(process, inputs=[image, desc, sym], outputs=[struct_json, file_struct, file_scene, file_landmarks])
389
+
390
+ if __name__ == "__main__":
391
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio>=4.44.0
2
+ mediapipe==0.10.14
3
+ opencv-python>=4.7.0.72
4
+ numpy
5
+ Pillow