Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +391 -0
- requirements.txt +5 -0
app.py
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import numpy as np
|
| 4 |
+
from PIL import Image
|
| 5 |
+
import mediapipe as mp
|
| 6 |
+
import cv2
|
| 7 |
+
import json, os, io, uuid, time
|
| 8 |
+
from typing import Dict, Any, List, Tuple
|
| 9 |
+
|
| 10 |
+
# -----------------------------
|
| 11 |
+
# Geometry helpers
|
| 12 |
+
# -----------------------------
|
| 13 |
+
def dist2D(a, b):
|
| 14 |
+
return ((a[0]-b[0])**2 + (a[1]-b[1])**2) ** 0.5
|
| 15 |
+
|
| 16 |
+
# -----------------------------
|
| 17 |
+
# Face mesh indices (subset)
|
| 18 |
+
# -----------------------------
|
| 19 |
+
LM = dict(
|
| 20 |
+
leftEyeOuter=33,
|
| 21 |
+
rightEyeOuter=263,
|
| 22 |
+
noseLeft=97,
|
| 23 |
+
noseRight=326,
|
| 24 |
+
noseTip=1,
|
| 25 |
+
chin=152,
|
| 26 |
+
midForehead=10,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Oval for contours (subset path)
|
| 30 |
+
FACEMESH_FACE_OVAL = [
|
| 31 |
+
[10, 338], [338, 297], [297, 332], [332, 284], [284, 251], [251, 389],
|
| 32 |
+
[389, 356], [356, 454], [454, 323], [323, 361], [361, 288], [288, 397],
|
| 33 |
+
[397, 365], [365, 379], [379, 378], [378, 400], [400, 377], [377, 152],
|
| 34 |
+
[152, 148], [148, 176], [176, 149], [149, 138], [138, 172], [172, 136],
|
| 35 |
+
[136, 150], [150, 149], [149, 176], [176, 148], [148, 152], [152, 377],
|
| 36 |
+
[377, 400], [400, 378], [378, 379], [379, 365], [365, 397], [397, 288],
|
| 37 |
+
[288, 361], [361, 323], [323, 454], [454, 356], [356, 389], [389, 251],
|
| 38 |
+
[251, 284], [284, 332], [332, 297], [297, 338], [338, 10]
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
LEFT_RIGHT_PAIRS = [
|
| 42 |
+
(33, 263), # outer eyes
|
| 43 |
+
(159, 386), # inner eyelids approx
|
| 44 |
+
(70, 300), # brows
|
| 45 |
+
(36, 266), # cheeks
|
| 46 |
+
(50, 280), # mid cheeks
|
| 47 |
+
(234, 454), # jaw
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
# -----------------------------
|
| 51 |
+
# MediaPipe processing
|
| 52 |
+
# -----------------------------
|
| 53 |
+
def image_to_rgb(np_img):
|
| 54 |
+
if np_img.ndim == 2:
|
| 55 |
+
return cv2.cvtColor(np_img, cv2.COLOR_GRAY2RGB)
|
| 56 |
+
if np_img.shape[2] == 4:
|
| 57 |
+
return cv2.cvtColor(np_img, cv2.COLOR_BGRA2RGB)
|
| 58 |
+
return cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)
|
| 59 |
+
|
| 60 |
+
def extract_landmarks(np_img):
|
| 61 |
+
img_rgb = image_to_rgb(np_img)
|
| 62 |
+
h, w = img_rgb.shape[:2]
|
| 63 |
+
mp_face_mesh = mp.solutions.face_mesh
|
| 64 |
+
with mp_face_mesh.FaceMesh(static_image_mode=True, refine_landmarks=True, max_num_faces=1) as face_mesh:
|
| 65 |
+
res = face_mesh.process(img_rgb)
|
| 66 |
+
if not res.multi_face_landmarks:
|
| 67 |
+
return None, None, (w, h)
|
| 68 |
+
lms = res.multi_face_landmarks[0].landmark
|
| 69 |
+
kps = [(lm.x * w, lm.y * h, lm.z) for lm in lms]
|
| 70 |
+
xs = [p[0] for p in kps]; ys = [p[1] for p in kps]
|
| 71 |
+
x_min, x_max = max(0, int(np.min(xs))), min(w-1, int(np.max(xs)))
|
| 72 |
+
y_min, y_max = max(0, int(np.min(ys))), min(h-1, int(np.max(ys)))
|
| 73 |
+
bbox = (x_min, y_min, x_max-x_min, y_max-y_min)
|
| 74 |
+
return kps, bbox, (w, h)
|
| 75 |
+
|
| 76 |
+
# -----------------------------
|
| 77 |
+
# Symmetry estimation (automatic fallback)
|
| 78 |
+
# -----------------------------
|
| 79 |
+
def compute_symmetry_scores(kps, bbox):
|
| 80 |
+
if kps is None:
|
| 81 |
+
return {"enabled": False, "note": "sem rosto detectado"}
|
| 82 |
+
x, y, bw, bh = bbox
|
| 83 |
+
cx = x + bw / 2.0
|
| 84 |
+
# measure mirrored pair distances normalized by bbox width
|
| 85 |
+
pair_diffs = []
|
| 86 |
+
for a, b in LEFT_RIGHT_PAIRS:
|
| 87 |
+
if a < len(kps) and b < len(kps):
|
| 88 |
+
xa, ya, _ = kps[a]; xb, yb, _ = kps[b]
|
| 89 |
+
# distance from midline for each
|
| 90 |
+
da = abs(xa - cx)
|
| 91 |
+
db = abs(xb - cx)
|
| 92 |
+
pair_diffs.append(abs(da - db) / max(bw, 1.0))
|
| 93 |
+
if not pair_diffs:
|
| 94 |
+
score = 0.0
|
| 95 |
+
else:
|
| 96 |
+
score = max(0.0, 1.0 - float(np.mean(pair_diffs))*2.0) # crude normalization
|
| 97 |
+
return {
|
| 98 |
+
"enabled": True,
|
| 99 |
+
"symmetry_score": round(score, 3),
|
| 100 |
+
"method": "midline-distance left/right pairs (normalized)",
|
| 101 |
+
"pairs_used": len(pair_diffs)
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
def load_symmetry_json(fileobj):
|
| 105 |
+
try:
|
| 106 |
+
if fileobj is None:
|
| 107 |
+
return None
|
| 108 |
+
if isinstance(fileobj, str) and os.path.isfile(fileobj):
|
| 109 |
+
with open(fileobj, "r", encoding="utf-8") as f:
|
| 110 |
+
return json.load(f)
|
| 111 |
+
# gradio may pass a dict with "name"/"data" or a tempfile path
|
| 112 |
+
if hasattr(fileobj, "name") and os.path.isfile(fileobj.name):
|
| 113 |
+
with open(fileobj.name, "r", encoding="utf-8") as f:
|
| 114 |
+
return json.load(f)
|
| 115 |
+
# If it's bytes / file-like
|
| 116 |
+
if hasattr(fileobj, "read"):
|
| 117 |
+
data = fileobj.read()
|
| 118 |
+
return json.loads(data.decode("utf-8"))
|
| 119 |
+
except Exception as e:
|
| 120 |
+
return {"enabled": False, "error": f"Falha ao ler simetria: {e}"}
|
| 121 |
+
return None
|
| 122 |
+
|
| 123 |
+
# -----------------------------
|
| 124 |
+
# Simple rule-based parser for description (PT-BR)
|
| 125 |
+
# -----------------------------
|
| 126 |
+
def parse_description(text: str) -> Dict[str, Any]:
|
| 127 |
+
t = (text or "").lower()
|
| 128 |
+
|
| 129 |
+
def has(*keys):
|
| 130 |
+
return any(k in t for k in keys)
|
| 131 |
+
|
| 132 |
+
def extract_between(prefix, suffix):
|
| 133 |
+
# utility (very simple) for patterns like "olhos ... castanhos"
|
| 134 |
+
try:
|
| 135 |
+
i = t.index(prefix) + len(prefix)
|
| 136 |
+
j = t.index(suffix, i)
|
| 137 |
+
return t[i:j].strip()
|
| 138 |
+
except Exception:
|
| 139 |
+
return None
|
| 140 |
+
|
| 141 |
+
# person
|
| 142 |
+
age = "indefinido"
|
| 143 |
+
if has("45", "55", "meia-idade", "maduro"):
|
| 144 |
+
age = "45-55"
|
| 145 |
+
elif has("adulto jovem", "20", "30"):
|
| 146 |
+
age = "adulto jovem"
|
| 147 |
+
|
| 148 |
+
gender = "indefinido"
|
| 149 |
+
if has("masculino", "homem"):
|
| 150 |
+
gender = "masculino"
|
| 151 |
+
if has("feminino", "mulher"):
|
| 152 |
+
gender = "feminino"
|
| 153 |
+
|
| 154 |
+
# face shape
|
| 155 |
+
shape = "oval" if has("rosto oval", "formato oval", "oval") else "indefinido"
|
| 156 |
+
|
| 157 |
+
# skin
|
| 158 |
+
skin_tone = "clara" if has("pele clara") else ("escura" if has("pele escura") else "média" if has("pele média") else "indefinido")
|
| 159 |
+
skin_texture = "lisa" if has("pele lisa", "textura lisa") else "natural"
|
| 160 |
+
|
| 161 |
+
# eyes
|
| 162 |
+
eye_shape = "amendoado" if has("olhos amendoados") else "indefinido"
|
| 163 |
+
eye_color = None
|
| 164 |
+
if has("olhos castanhos", "castanha escura", "castanho escuro"):
|
| 165 |
+
eye_color = "castanho escuro"
|
| 166 |
+
elif has("olhos azuis"):
|
| 167 |
+
eye_color = "azul"
|
| 168 |
+
elif has("olhos verdes"):
|
| 169 |
+
eye_color = "verde"
|
| 170 |
+
eye_expr = "confiante" if has("confiança", "confiante") else ("amigável" if has("amigável") else "neutro")
|
| 171 |
+
|
| 172 |
+
# eyebrows
|
| 173 |
+
brow = "arqueadas moderadas" if has("sobrancelhas arqueadas") else ("grossas" if has("sobrancelhas grossas") else ("finas" if has("sobrancelhas finas") else "naturais"))
|
| 174 |
+
|
| 175 |
+
# nose
|
| 176 |
+
nose_shape = "reto" if has("nariz reto") else "proporcional"
|
| 177 |
+
nose_size = "médio" if has("tamanho médio") else "indefinido"
|
| 178 |
+
|
| 179 |
+
# mouth
|
| 180 |
+
mouth_shape = "lábios finos" if has("lábios finos") else "natural"
|
| 181 |
+
mouth_expr = "sorriso sutil" if has("sorriso sutil", "sorriso suave") else ("neutra" if has("expressão neutra") else "serena")
|
| 182 |
+
|
| 183 |
+
# hair
|
| 184 |
+
hair_length = "curto" if has("cabelo curto") else "indefinido"
|
| 185 |
+
hair_style = "penteado para trás" if has("penteado para trás") else "indefinido"
|
| 186 |
+
hair_color = "castanho escuro" if has("castanha escura", "castanho escuro") else "indefinido"
|
| 187 |
+
|
| 188 |
+
# cheeks / jaw
|
| 189 |
+
cheeks = "maçãs do rosto proeminentes" if has("maçãs do rosto são proeminentes", "maçãs do rosto proeminentes") else "bochechas suaves"
|
| 190 |
+
jawline = "mandíbula bem definida" if has("mandíbula bem definida") else "indefinida"
|
| 191 |
+
chin = "queixo arredondado" if has("queixo é arredondado", "queixo arredondado") else "indefinido"
|
| 192 |
+
forehead = "testa alta" if has("testa é alta", "testa alta") else "indefinida"
|
| 193 |
+
|
| 194 |
+
# pose / gaze
|
| 195 |
+
pose_head_tilt = "cabeça ereta" if has("postura é ereta") else "ligeiramente inclinada" if has("ligeiramente inclinada") else "indefinido"
|
| 196 |
+
gaze = "olhar direto" if has("olhar direto", "apresentação", "discursando") else "neutro"
|
| 197 |
+
|
| 198 |
+
# lighting
|
| 199 |
+
lighting = "frontal suave" if has("iluminação", "fundo neutro") else "indefinido"
|
| 200 |
+
|
| 201 |
+
# attire / context
|
| 202 |
+
attire = []
|
| 203 |
+
if has("camisa social preta"): attire.append("camisa social preta")
|
| 204 |
+
if has("paletó cinza"): attire.append("paletó cinza de um botão")
|
| 205 |
+
attire = ", ".join(attire) if attire else ("formal" if has("paletó", "terno", "social") else "casual")
|
| 206 |
+
accessories = "nenhum" if has("sem óculos", "sem brincos", "não há acessórios") else "indefinido"
|
| 207 |
+
background = "fundo neutro" if has("fundo neutro") else "indefinido"
|
| 208 |
+
|
| 209 |
+
# high-level impression
|
| 210 |
+
impression = "confiança e seriedade, com um sorriso sutil" if has("confiança", "seriedade", "sorriso sutil", "sorriso suave") else "neutra"
|
| 211 |
+
|
| 212 |
+
return {
|
| 213 |
+
"person": {"age": age, "gender": gender},
|
| 214 |
+
"facial_structure": {
|
| 215 |
+
"overall_shape": f"formato {shape}" if shape!="indefinido" else "indefinido",
|
| 216 |
+
"proportions": "proporções equilibradas",
|
| 217 |
+
"skin_texture": f"pele {skin_texture}",
|
| 218 |
+
"skin_tone": skin_tone,
|
| 219 |
+
"jawline": jawline,
|
| 220 |
+
"chin": chin,
|
| 221 |
+
"forehead": forehead
|
| 222 |
+
},
|
| 223 |
+
"features": {
|
| 224 |
+
"eyes": {"shape": eye_shape if eye_shape!="indefinido" else "olhos expressivos",
|
| 225 |
+
"color": eye_color or "indefinido",
|
| 226 |
+
"expression": eye_expr,
|
| 227 |
+
"eyebrows": brow},
|
| 228 |
+
"nose": {"shape": nose_shape, "size": nose_size},
|
| 229 |
+
"mouth": {"shape": mouth_shape, "expression": mouth_expr},
|
| 230 |
+
"hair": {"length": hair_length, "style": hair_style, "color": hair_color},
|
| 231 |
+
"cheeks": {"shape": cheeks}
|
| 232 |
+
},
|
| 233 |
+
"pose": {"head_tilt": pose_head_tilt, "gaze": gaze},
|
| 234 |
+
"lighting": {"direction": lighting},
|
| 235 |
+
"contextual_details": {"attire": attire, "accessories": accessories, "background": background},
|
| 236 |
+
"impression": impression
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
# -----------------------------
|
| 240 |
+
# Merge metrics + description + symmetry
|
| 241 |
+
# -----------------------------
|
| 242 |
+
def build_structured_json(desc_struct, metrics, symmetry_block):
|
| 243 |
+
# expression metrics can mix subjective + objective placeholders
|
| 244 |
+
expr = {
|
| 245 |
+
"happiness_score": 0.6 if "sorriso" in (desc_struct.get("impression","") or "") else 0.4,
|
| 246 |
+
"calmness_score": 0.8
|
| 247 |
+
}
|
| 248 |
+
if symmetry_block and isinstance(symmetry_block, dict):
|
| 249 |
+
expr["symmetry_score"] = symmetry_block.get("symmetry_score", None)
|
| 250 |
+
|
| 251 |
+
out = {
|
| 252 |
+
**{k: v for k, v in desc_struct.items() if k not in ("impression",)},
|
| 253 |
+
"expression_metrics": expr,
|
| 254 |
+
"symmetry": symmetry_block if symmetry_block else {"enabled": False},
|
| 255 |
+
"metrics": metrics or {}
|
| 256 |
+
}
|
| 257 |
+
return out
|
| 258 |
+
|
| 259 |
+
def metrics_from_landmarks(kps, bbox, img_size):
|
| 260 |
+
if kps is None:
|
| 261 |
+
return {"error": "Nenhum rosto detectado"}
|
| 262 |
+
(w, h) = img_size
|
| 263 |
+
x, y, bw, bh = bbox
|
| 264 |
+
eyeL = kps[LM["leftEyeOuter"]]; eyeR = kps[LM["rightEyeOuter"]]
|
| 265 |
+
noseL = kps[LM["noseLeft"]]; noseR = kps[LM["noseRight"]]
|
| 266 |
+
interocular_px = dist2D(eyeL, eyeR)
|
| 267 |
+
nose_width_px = dist2D(noseL, noseR)
|
| 268 |
+
interocular_norm = interocular_px / max(bw, 1.0)
|
| 269 |
+
nose_width_norm = nose_width_px / max(bw, 1.0)
|
| 270 |
+
|
| 271 |
+
return {
|
| 272 |
+
"image": {"width": int(w), "height": int(h)},
|
| 273 |
+
"bbox": {"x": int(x), "y": int(y), "w": int(bw), "h": int(bh)},
|
| 274 |
+
"distances": {
|
| 275 |
+
"interocular_px": round(float(interocular_px), 2),
|
| 276 |
+
"interocular_norm": round(float(interocular_norm), 3),
|
| 277 |
+
"nose_width_px": round(float(nose_width_px), 2),
|
| 278 |
+
"nose_width_norm": round(float(nose_width_norm), 3),
|
| 279 |
+
},
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
# -----------------------------
|
| 283 |
+
# Main pipeline
|
| 284 |
+
# -----------------------------
|
| 285 |
+
def process(image, description_text, symmetry_json_file):
|
| 286 |
+
# 1) Prepare image np array
|
| 287 |
+
if image is None:
|
| 288 |
+
return "Envie uma imagem.", None, None
|
| 289 |
+
if isinstance(image, dict) and "image" in image:
|
| 290 |
+
np_img = image["image"]
|
| 291 |
+
else:
|
| 292 |
+
np_img = np.array(image)
|
| 293 |
+
|
| 294 |
+
# 2) Landmarks
|
| 295 |
+
kps, bbox, img_size = extract_landmarks(np_img)
|
| 296 |
+
metrics = metrics_from_landmarks(kps, bbox, img_size)
|
| 297 |
+
|
| 298 |
+
# 3) Symmetry: load file or compute automatic
|
| 299 |
+
user_sym = load_symmetry_json(symmetry_json_file)
|
| 300 |
+
if user_sym is None or (isinstance(user_sym, dict) and user_sym.get("enabled") is False and "error" in user_sym):
|
| 301 |
+
auto_sym = compute_symmetry_scores(kps, bbox) if kps is not None else {"enabled": False}
|
| 302 |
+
symmetry_block = auto_sym
|
| 303 |
+
symmetry_origin = "auto"
|
| 304 |
+
else:
|
| 305 |
+
# validate presence of required keys minimally
|
| 306 |
+
if isinstance(user_sym, dict) and ("symmetry_score" in user_sym or "enabled" in user_sym):
|
| 307 |
+
symmetry_block = user_sym
|
| 308 |
+
else:
|
| 309 |
+
symmetry_block = {"enabled": True, "custom": user_sym}
|
| 310 |
+
symmetry_origin = "uploaded"
|
| 311 |
+
|
| 312 |
+
# 4) Parse description
|
| 313 |
+
desc_struct = parse_description(description_text or "")
|
| 314 |
+
|
| 315 |
+
# 4.5) Prepare landmarks list for output
|
| 316 |
+
landmarks_list = []
|
| 317 |
+
if kps is not None:
|
| 318 |
+
for (px, py, pz) in kps:
|
| 319 |
+
landmarks_list.append({"x": round(float(px), 3), "y": round(float(py), 3), "z": round(float(pz), 5)})
|
| 320 |
+
|
| 321 |
+
# 4.7) Compute face_signature hash
|
| 322 |
+
face_signature = None
|
| 323 |
+
if kps is not None:
|
| 324 |
+
# serialize landmarks for hash
|
| 325 |
+
lm_bytes = json.dumps(landmarks_list, ensure_ascii=False, sort_keys=True).encode("utf-8")
|
| 326 |
+
face_signature = hashlib.sha256(lm_bytes).hexdigest()
|
| 327 |
+
|
| 328 |
+
# 5) Merge all
|
| 329 |
+
structured = build_structured_json(desc_struct, metrics, symmetry_block)
|
| 330 |
+
structured["face_landmarks"] = landmarks_list
|
| 331 |
+
if face_signature:
|
| 332 |
+
structured["face_signature"] = face_signature
|
| 333 |
+
|
| 334 |
+
# 6) Compose Veo 3 scene JSON
|
| 335 |
+
scene = {
|
| 336 |
+
"face_landmarks": landmarks_list,
|
| 337 |
+
"project": {
|
| 338 |
+
"id": f"veo3_face_clone_{uuid.uuid4().hex[:8]}",
|
| 339 |
+
"version": "1.0",
|
| 340 |
+
"created_at": int(time.time())
|
| 341 |
+
},
|
| 342 |
+
"structured_description": structured,
|
| 343 |
+
"controls": {
|
| 344 |
+
"seed": 123456789,
|
| 345 |
+
"clone_mode": "photorealistic",
|
| 346 |
+
"symmetry_origin": symmetry_origin,
|
| 347 |
+
"face_signature": face_signature if face_signature else None
|
| 348 |
+
},
|
| 349 |
+
"prompt": {
|
| 350 |
+
"summary": "Retrato frontal, fidelidade máxima ao rosto de referência, iluminação neutra, fundo limpo.",
|
| 351 |
+
"hints": [
|
| 352 |
+
"respeitar medidas faciais normalizadas (interocular, largura do nariz)",
|
| 353 |
+
"reproduzir textura e cor do cabelo conforme descrição",
|
| 354 |
+
"manter expressão: sorriso sutil e olhar direto quando indicado"
|
| 355 |
+
]
|
| 356 |
+
}
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
# 7) Save JSONs
|
| 360 |
+
path_landmarks = os.path.join(out_dir, "face_landmarks.json")
|
| 361 |
+
with open(path_landmarks, "w", encoding="utf-8") as f:
|
| 362 |
+
json.dump(landmarks_list, f, ensure_ascii=False, indent=2)
|
| 363 |
+
out_dir = os.path.join("/tmp", f"face_clone_{uuid.uuid4().hex[:8]}")
|
| 364 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 365 |
+
path_struct = os.path.join(out_dir, "face_description_structured.json")
|
| 366 |
+
path_scene = os.path.join(out_dir, "veo3_scene.json")
|
| 367 |
+
with open(path_struct, "w", encoding="utf-8") as f:
|
| 368 |
+
json.dump(structured, f, ensure_ascii=False, indent=2)
|
| 369 |
+
with open(path_scene, "w", encoding="utf-8") as f:
|
| 370 |
+
json.dump(scene, f, ensure_ascii=False, indent=2)
|
| 371 |
+
|
| 372 |
+
# 8) Return
|
| 373 |
+
return json.dumps(structured, ensure_ascii=False, indent=2), path_struct, path_scene, path_landmarks
|
| 374 |
+
|
| 375 |
+
with gr.Blocks(title="Face Clone JSON Builder • Universal") as demo:
|
| 376 |
+
gr.Markdown("## Face Clone JSON Builder (Universal)\nEnvie imagem + descrição + JSON de simetria (opcional). O app funde tudo e gera um JSON pronto para Veo 3.")
|
| 377 |
+
with gr.Row():
|
| 378 |
+
image = gr.Image(type="numpy", label="Imagem (upload/câmera)")
|
| 379 |
+
sym = gr.File(label="Simetria (*.json) — opcional")
|
| 380 |
+
desc = gr.Textbox(lines=12, label="Descrição detalhada (PT/BR)", placeholder="Cole aqui a descrição: formato do rosto, olhos, nariz, boca, cabelo, pele, idade/gênero, postura, vestimenta, expressão...")
|
| 381 |
+
|
| 382 |
+
btn = gr.Button("Gerar JSON para Veo 3")
|
| 383 |
+
struct_json = gr.Code(label="face_description_structured.json (preview)", language="json")
|
| 384 |
+
file_struct = gr.File(label="Baixar face_description_structured.json")
|
| 385 |
+
file_scene = gr.File(label="Baixar veo3_scene.json")
|
| 386 |
+
file_landmarks = gr.File(label="Baixar face_landmarks.json")
|
| 387 |
+
|
| 388 |
+
btn.click(process, inputs=[image, desc, sym], outputs=[struct_json, file_struct, file_scene, file_landmarks])
|
| 389 |
+
|
| 390 |
+
if __name__ == "__main__":
|
| 391 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.44.0
|
| 2 |
+
mediapipe==0.10.14
|
| 3 |
+
opencv-python>=4.7.0.72
|
| 4 |
+
numpy
|
| 5 |
+
Pillow
|