cngsm commited on
Commit
329dd42
·
verified ·
1 Parent(s): eaf4a4f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +393 -0
app.py ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from PIL import Image
4
+ import mediapipe as mp
5
+ import cv2
6
+ import json, os, io, uuid, time, hashlib
7
+ from typing import Dict, Any, List, Tuple
8
+
9
+ # -----------------------------
10
+ # Geometry helpers
11
+ # -----------------------------
12
+ def dist2D(a, b):
13
+ return ((a[0]-b[0])**2 + (a[1]-b[1])**2) ** 0.5
14
+
15
+ # -----------------------------
16
+ # Face mesh indices (subset)
17
+ # -----------------------------
18
+ LM = dict(
19
+ leftEyeOuter=33,
20
+ rightEyeOuter=263,
21
+ noseLeft=97,
22
+ noseRight=326,
23
+ noseTip=1,
24
+ chin=152,
25
+ midForehead=10,
26
+ )
27
+
28
+ # Oval for contours (subset path)
29
+ FACEMESH_FACE_OVAL = [
30
+ [10, 338], [338, 297], [297, 332], [332, 284], [284, 251], [251, 389],
31
+ [389, 356], [356, 454], [454, 323], [323, 361], [361, 288], [288, 397],
32
+ [397, 365], [365, 379], [379, 378], [378, 400], [400, 377], [377, 152],
33
+ [152, 148], [148, 176], [176, 149], [149, 138], [138, 172], [172, 136],
34
+ [136, 150], [150, 149], [149, 176], [176, 148], [148, 152], [152, 377],
35
+ [377, 400], [400, 378], [378, 379], [379, 365], [365, 397], [397, 288],
36
+ [288, 361], [361, 323], [323, 454], [454, 356], [356, 389], [389, 251],
37
+ [251, 284], [284, 332], [332, 297], [297, 338], [338, 10]
38
+ ]
39
+
40
+ LEFT_RIGHT_PAIRS = [
41
+ (33, 263), # outer eyes
42
+ (159, 386), # inner eyelids approx
43
+ (70, 300), # brows
44
+ (36, 266), # cheeks
45
+ (50, 280), # mid cheeks
46
+ (234, 454), # jaw
47
+ ]
48
+
49
+ # -----------------------------
50
+ # MediaPipe processing
51
+ # -----------------------------
52
+ def image_to_rgb(np_img):
53
+ if np_img.ndim == 2:
54
+ return cv2.cvtColor(np_img, cv2.COLOR_GRAY2RGB)
55
+ if np_img.shape[2] == 4:
56
+ return cv2.cvtColor(np_img, cv2.COLOR_BGRA2RGB)
57
+ return cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)
58
+
59
+ def extract_landmarks(np_img):
60
+ img_rgb = image_to_rgb(np_img)
61
+ h, w = img_rgb.shape[:2]
62
+ mp_face_mesh = mp.solutions.face_mesh
63
+ with mp_face_mesh.FaceMesh(static_image_mode=True, refine_landmarks=True, max_num_faces=1) as face_mesh:
64
+ res = face_mesh.process(img_rgb)
65
+ if not res.multi_face_landmarks:
66
+ return None, None, (w, h)
67
+ lms = res.multi_face_landmarks[0].landmark
68
+ kps = [(lm.x * w, lm.y * h, lm.z) for lm in lms]
69
+ xs = [p[0] for p in kps]; ys = [p[1] for p in kps]
70
+ x_min, x_max = max(0, int(np.min(xs))), min(w-1, int(np.max(xs)))
71
+ y_min, y_max = max(0, int(np.min(ys))), min(h-1, int(np.max(ys)))
72
+ bbox = (x_min, y_min, x_max-x_min, y_max-y_min)
73
+ return kps, bbox, (w, h)
74
+
75
+ # -----------------------------
76
+ # Symmetry estimation (automatic fallback)
77
+ # -----------------------------
78
+ def compute_symmetry_scores(kps, bbox):
79
+ if kps is None:
80
+ return {"enabled": False, "note": "sem rosto detectado"}
81
+ x, y, bw, bh = bbox
82
+ cx = x + bw / 2.0
83
+ # measure mirrored pair distances normalized by bbox width
84
+ pair_diffs = []
85
+ for a, b in LEFT_RIGHT_PAIRS:
86
+ if a < len(kps) and b < len(kps):
87
+ xa, ya, _ = kps[a]; xb, yb, _ = kps[b]
88
+ # distance from midline for each
89
+ da = abs(xa - cx)
90
+ db = abs(xb - cx)
91
+ pair_diffs.append(abs(da - db) / max(bw, 1.0))
92
+ if not pair_diffs:
93
+ score = 0.0
94
+ else:
95
+ score = max(0.0, 1.0 - float(np.mean(pair_diffs))*2.0) # crude normalization
96
+ return {
97
+ "enabled": True,
98
+ "symmetry_score": round(score, 3),
99
+ "method": "midline-distance left/right pairs (normalized)",
100
+ "pairs_used": len(pair_diffs)
101
+ }
102
+
103
+ def load_symmetry_json(fileobj):
104
+ try:
105
+ if fileobj is None:
106
+ return None
107
+ if isinstance(fileobj, str) and os.path.isfile(fileobj):
108
+ with open(fileobj, "r", encoding="utf-8") as f:
109
+ return json.load(f)
110
+ # gradio may pass a dict with "name"/"data" or a tempfile path
111
+ if hasattr(fileobj, "name") and os.path.isfile(fileobj.name):
112
+ with open(fileobj.name, "r", encoding="utf-8") as f:
113
+ return json.load(f)
114
+ # If it's bytes / file-like
115
+ if hasattr(fileobj, "read"):
116
+ data = fileobj.read()
117
+ return json.loads(data.decode("utf-8"))
118
+ except Exception as e:
119
+ return {"enabled": False, "error": f"Falha ao ler simetria: {e}"}
120
+ return None
121
+
122
+ # -----------------------------
123
+ # Simple rule-based parser for description (PT-BR)
124
+ # -----------------------------
125
+ def parse_description(text: str) -> Dict[str, Any]:
126
+ t = (text or "").lower()
127
+
128
+ def has(*keys):
129
+ return any(k in t for k in keys)
130
+
131
+ def extract_between(prefix, suffix):
132
+ # utility (very simple) for patterns like "olhos ... castanhos"
133
+ try:
134
+ i = t.index(prefix) + len(prefix)
135
+ j = t.index(suffix, i)
136
+ return t[i:j].strip()
137
+ except Exception:
138
+ return None
139
+
140
+ # person
141
+ age = "indefinido"
142
+ if has("45", "55", "meia-idade", "maduro"):
143
+ age = "45-55"
144
+ elif has("adulto jovem", "20", "30"):
145
+ age = "adulto jovem"
146
+
147
+ gender = "indefinido"
148
+ if has("masculino", "homem"):
149
+ gender = "masculino"
150
+ if has("feminino", "mulher"):
151
+ gender = "feminino"
152
+
153
+ # face shape
154
+ shape = "oval" if has("rosto oval", "formato oval", "oval") else "indefinido"
155
+
156
+ # skin
157
+ skin_tone = "clara" if has("pele clara") else ("escura" if has("pele escura") else "média" if has("pele média") else "indefinido")
158
+ skin_texture = "lisa" if has("pele lisa", "textura lisa") else "natural"
159
+
160
+ # eyes
161
+ eye_shape = "amendoado" if has("olhos amendoados") else "indefinido"
162
+ eye_color = None
163
+ if has("olhos castanhos", "castanha escura", "castanho escuro"):
164
+ eye_color = "castanho escuro"
165
+ elif has("olhos azuis"):
166
+ eye_color = "azul"
167
+ elif has("olhos verdes"):
168
+ eye_color = "verde"
169
+ eye_expr = "confiante" if has("confiança", "confiante") else ("amigável" if has("amigável") else "neutro")
170
+
171
+ # eyebrows
172
+ brow = "arqueadas moderadas" if has("sobrancelhas arqueadas") else ("grossas" if has("sobrancelhas grossas") else ("finas" if has("sobrancelhas finas") else "naturais"))
173
+
174
+ # nose
175
+ nose_shape = "reto" if has("nariz reto") else "proporcional"
176
+ nose_size = "médio" if has("tamanho médio") else "indefinido"
177
+
178
+ # mouth
179
+ mouth_shape = "lábios finos" if has("lábios finos") else "natural"
180
+ mouth_expr = "sorriso sutil" if has("sorriso sutil", "sorriso suave") else ("neutra" if has("expressão neutra") else "serena")
181
+
182
+ # hair
183
+ hair_length = "curto" if has("cabelo curto") else "indefinido"
184
+ hair_style = "penteado para trás" if has("penteado para trás") else "indefinido"
185
+ hair_color = "castanho escuro" if has("castanha escura", "castanho escuro") else "indefinido"
186
+
187
+ # cheeks / jaw
188
+ cheeks = "maçãs do rosto proeminentes" if has("maçãs do rosto são proeminentes", "maçãs do rosto proeminentes") else "bochechas suaves"
189
+ jawline = "mandíbula bem definida" if has("mandíbula bem definida") else "indefinida"
190
+ chin = "queixo arredondado" if has("queixo é arredondado", "queixo arredondado") else "indefinido"
191
+ forehead = "testa alta" if has("testa é alta", "testa alta") else "indefinida"
192
+
193
+ # pose / gaze
194
+ pose_head_tilt = "cabeça ereta" if has("postura é ereta") else "ligeiramente inclinada" if has("ligeiramente inclinada") else "indefinido"
195
+ gaze = "olhar direto" if has("olhar direto", "apresentação", "discursando") else "neutro"
196
+
197
+ # lighting
198
+ lighting = "frontal suave" if has("iluminação", "fundo neutro") else "indefinido"
199
+
200
+ # attire / context
201
+ attire = []
202
+ if has("camisa social preta"): attire.append("camisa social preta")
203
+ if has("paletó cinza"): attire.append("paletó cinza de um botão")
204
+ attire = ", ".join(attire) if attire else ("formal" if has("paletó", "terno", "social") else "casual")
205
+ accessories = "nenhum" if has("sem óculos", "sem brincos", "não há acessórios") else "indefinido"
206
+ background = "fundo neutro" if has("fundo neutro") else "indefinido"
207
+
208
+ # high-level impression
209
+ impression = "confiança e seriedade, com um sorriso sutil" if has("confiança", "seriedade", "sorriso sutil", "sorriso suave") else "neutra"
210
+
211
+ return {
212
+ "person": {"age": age, "gender": gender},
213
+ "facial_structure": {
214
+ "overall_shape": f"formato {shape}" if shape!="indefinido" else "indefinido",
215
+ "proportions": "proporções equilibradas",
216
+ "skin_texture": f"pele {skin_texture}",
217
+ "skin_tone": skin_tone,
218
+ "jawline": jawline,
219
+ "chin": chin,
220
+ "forehead": forehead
221
+ },
222
+ "features": {
223
+ "eyes": {"shape": eye_shape if eye_shape!="indefinido" else "olhos expressivos",
224
+ "color": eye_color or "indefinido",
225
+ "expression": eye_expr,
226
+ "eyebrows": brow},
227
+ "nose": {"shape": nose_shape, "size": nose_size},
228
+ "mouth": {"shape": mouth_shape, "expression": mouth_expr},
229
+ "hair": {"length": hair_length, "style": hair_style, "color": hair_color},
230
+ "cheeks": {"shape": cheeks}
231
+ },
232
+ "pose": {"head_tilt": pose_head_tilt, "gaze": gaze},
233
+ "lighting": {"direction": lighting},
234
+ "contextual_details": {"attire": attire, "accessories": accessories, "background": background},
235
+ "impression": impression
236
+ }
237
+
238
+ # -----------------------------
239
+ # Merge metrics + description + symmetry
240
+ # -----------------------------
241
+ def build_structured_json(desc_struct, metrics, symmetry_block):
242
+ # expression metrics can mix subjective + objective placeholders
243
+ expr = {
244
+ "happiness_score": 0.6 if "sorriso" in (desc_struct.get("impression","") or "") else 0.4,
245
+ "calmness_score": 0.8
246
+ }
247
+ if symmetry_block and isinstance(symmetry_block, dict):
248
+ expr["symmetry_score"] = symmetry_block.get("symmetry_score", None)
249
+
250
+ out = {
251
+ **{k: v for k, v in desc_struct.items() if k not in ("impression",)},
252
+ "expression_metrics": expr,
253
+ "symmetry": symmetry_block if symmetry_block else {"enabled": False},
254
+ "metrics": metrics or {}
255
+ }
256
+ return out
257
+
258
+ def metrics_from_landmarks(kps, bbox, img_size):
259
+ if kps is None:
260
+ return {"error": "Nenhum rosto detectado"}
261
+ (w, h) = img_size
262
+ x, y, bw, bh = bbox
263
+ eyeL = kps[LM["leftEyeOuter"]]; eyeR = kps[LM["rightEyeOuter"]]
264
+ noseL = kps[LM["noseLeft"]]; noseR = kps[LM["noseRight"]]
265
+ interocular_px = dist2D(eyeL, eyeR)
266
+ nose_width_px = dist2D(noseL, noseR)
267
+ interocular_norm = interocular_px / max(bw, 1.0)
268
+ nose_width_norm = nose_width_px / max(bw, 1.0)
269
+
270
+ return {
271
+ "image": {"width": int(w), "height": int(h)},
272
+ "bbox": {"x": int(x), "y": int(y), "w": int(bw), "h": int(bh)},
273
+ "distances": {
274
+ "interocular_px": round(float(interocular_px), 2),
275
+ "interocular_norm": round(float(interocular_norm), 3),
276
+ "nose_width_px": round(float(nose_width_px), 2),
277
+ "nose_width_norm": round(float(nose_width_norm), 3),
278
+ },
279
+ }
280
+
281
+ # -----------------------------
282
+ # Main pipeline
283
+ # -----------------------------
284
+ def process(image, description_text, symmetry_json_file):
285
+ # 1) Prepare image np array
286
+ if image is None:
287
+ return "Envie uma imagem.", None, None
288
+ if isinstance(image, dict) and "image" in image:
289
+ np_img = image["image"]
290
+ else:
291
+ np_img = np.array(image)
292
+
293
+ # 2) Landmarks
294
+ kps, bbox, img_size = extract_landmarks(np_img)
295
+ metrics = metrics_from_landmarks(kps, bbox, img_size)
296
+
297
+ # 3) Symmetry: load file or compute automatic
298
+ user_sym = load_symmetry_json(symmetry_json_file)
299
+ if user_sym is None or (isinstance(user_sym, dict) and user_sym.get("enabled") is False and "error" in user_sym):
300
+ auto_sym = compute_symmetry_scores(kps, bbox) if kps is not None else {"enabled": False}
301
+ symmetry_block = auto_sym
302
+ symmetry_origin = "auto"
303
+ else:
304
+ # validate presence of required keys minimally
305
+ if isinstance(user_sym, dict) and ("symmetry_score" in user_sym or "enabled" in user_sym):
306
+ symmetry_block = user_sym
307
+ else:
308
+ symmetry_block = {"enabled": True, "custom": user_sym}
309
+ symmetry_origin = "uploaded"
310
+
311
+ # 4) Parse description
312
+ desc_struct = parse_description(description_text or "")
313
+
314
+ # 4.5) Prepare landmarks list for output
315
+ landmarks_list = []
316
+ if kps is not None:
317
+ for (px, py, pz) in kps:
318
+ landmarks_list.append({"x": round(float(px), 3), "y": round(float(py), 3), "z": round(float(pz), 5)})
319
+
320
+ # 4.7) Compute face_signature hash
321
+ face_signature = None
322
+ if kps is not None:
323
+ # serialize landmarks for hash
324
+ lm_bytes = json.dumps(landmarks_list, ensure_ascii=False, sort_keys=True).encode("utf-8")
325
+ face_signature = hashlib.sha256(lm_bytes).hexdigest()
326
+
327
+ # 5) Merge all
328
+ structured = build_structured_json(desc_struct, metrics, symmetry_block)
329
+ structured["face_landmarks"] = landmarks_list
330
+ if face_signature:
331
+ structured["face_signature"] = face_signature
332
+
333
+ # 6) Create output directory first
334
+ out_dir = os.path.join("/tmp", f"face_clone_{uuid.uuid4().hex[:8]}")
335
+ os.makedirs(out_dir, exist_ok=True)
336
+
337
+ # 7) Compose Veo 3 scene JSON
338
+ scene = {
339
+ "face_landmarks": landmarks_list,
340
+ "project": {
341
+ "id": f"veo3_face_clone_{uuid.uuid4().hex[:8]}",
342
+ "version": "1.0",
343
+ "created_at": int(time.time())
344
+ },
345
+ "structured_description": structured,
346
+ "controls": {
347
+ "seed": 123456789,
348
+ "clone_mode": "photorealistic",
349
+ "symmetry_origin": symmetry_origin,
350
+ "face_signature": face_signature if face_signature else None
351
+ },
352
+ "prompt": {
353
+ "summary": "Retrato frontal, fidelidade máxima ao rosto de referência, iluminação neutra, fundo limpo.",
354
+ "hints": [
355
+ "respeitar medidas faciais normalizadas (interocular, largura do nariz)",
356
+ "reproduzir textura e cor do cabelo conforme descrição",
357
+ "manter expressão: sorriso sutil e olhar direto quando indicado"
358
+ ]
359
+ }
360
+ }
361
+
362
+ # 8) Save JSONs
363
+ path_landmarks = os.path.join(out_dir, "face_landmarks.json")
364
+ with open(path_landmarks, "w", encoding="utf-8") as f:
365
+ json.dump(landmarks_list, f, ensure_ascii=False, indent=2)
366
+
367
+ path_struct = os.path.join(out_dir, "face_description_structured.json")
368
+ path_scene = os.path.join(out_dir, "veo3_scene.json")
369
+ with open(path_struct, "w", encoding="utf-8") as f:
370
+ json.dump(structured, f, ensure_ascii=False, indent=2)
371
+ with open(path_scene, "w", encoding="utf-8") as f:
372
+ json.dump(scene, f, ensure_ascii=False, indent=2)
373
+
374
+ # 9) Return
375
+ return json.dumps(structured, ensure_ascii=False, indent=2), path_struct, path_scene, path_landmarks
376
+
377
+ with gr.Blocks(title="Face Clone JSON Builder • Universal") as demo:
378
+ gr.Markdown("## Face Clone JSON Builder (Universal)\nEnvie imagem + descrição + JSON de simetria (opcional). O app funde tudo e gera um JSON pronto para Veo 3.")
379
+ with gr.Row():
380
+ image = gr.Image(type="numpy", label="Imagem (upload/câmera)")
381
+ sym = gr.File(label="Simetria (*.json) — opcional")
382
+ desc = gr.Textbox(lines=12, label="Descrição detalhada (PT/BR)", placeholder="Cole aqui a descrição: formato do rosto, olhos, nariz, boca, cabelo, pele, idade/gênero, postura, vestimenta, expressão...")
383
+
384
+ btn = gr.Button("Gerar JSON para Veo 3")
385
+ struct_json = gr.Code(label="face_description_structured.json (preview)", language="json")
386
+ file_struct = gr.File(label="Baixar face_description_structured.json")
387
+ file_scene = gr.File(label="Baixar veo3_scene.json")
388
+ file_landmarks = gr.File(label="Baixar face_landmarks.json")
389
+
390
+ btn.click(process, inputs=[image, desc, sym], outputs=[struct_json, file_struct, file_scene, file_landmarks])
391
+
392
+ if __name__ == "__main__":
393
+ demo.launch(server_name="0.0.0.0", server_port=7860)