Spaces:

cngsm
/

metricasface

Sleeping

App Files Files Community

cngsm commited on Aug 15, 2025

Commit

e370e60

verified ·

1 Parent(s): d282009

Upload 2 files

Browse files

Files changed (2) hide show

app.py +391 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,391 @@

+import gradio as gr
+import numpy as np
+from PIL import Image
+import mediapipe as mp
+import cv2
+import json, os, io, uuid, time
+from typing import Dict, Any, List, Tuple
+# -----------------------------
+# Geometry helpers
+# -----------------------------
+def dist2D(a, b):
+    return ((a[0]-b[0])**2 + (a[1]-b[1])**2) ** 0.5
+# -----------------------------
+# Face mesh indices (subset)
+# -----------------------------
+LM = dict(
+    leftEyeOuter=33,
+    rightEyeOuter=263,
+    noseLeft=97,
+    noseRight=326,
+    noseTip=1,
+    chin=152,
+    midForehead=10,
+)
+# Oval for contours (subset path)
+FACEMESH_FACE_OVAL = [
+    [10, 338], [338, 297], [297, 332], [332, 284], [284, 251], [251, 389],
+    [389, 356], [356, 454], [454, 323], [323, 361], [361, 288], [288, 397],
+    [397, 365], [365, 379], [379, 378], [378, 400], [400, 377], [377, 152],
+    [152, 148], [148, 176], [176, 149], [149, 138], [138, 172], [172, 136],
+    [136, 150], [150, 149], [149, 176], [176, 148], [148, 152], [152, 377],
+    [377, 400], [400, 378], [378, 379], [379, 365], [365, 397], [397, 288],
+    [288, 361], [361, 323], [323, 454], [454, 356], [356, 389], [389, 251],
+    [251, 284], [284, 332], [332, 297], [297, 338], [338, 10]
+]
+LEFT_RIGHT_PAIRS = [
+    (33, 263), # outer eyes
+    (159, 386), # inner eyelids approx
+    (70, 300),  # brows
+    (36, 266),  # cheeks
+    (50, 280),  # mid cheeks
+    (234, 454), # jaw
+]
+# -----------------------------
+# MediaPipe processing
+# -----------------------------
+def image_to_rgb(np_img):
+    if np_img.ndim == 2:
+        return cv2.cvtColor(np_img, cv2.COLOR_GRAY2RGB)
+    if np_img.shape[2] == 4:
+        return cv2.cvtColor(np_img, cv2.COLOR_BGRA2RGB)
+    return cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)
+def extract_landmarks(np_img):
+    img_rgb = image_to_rgb(np_img)
+    h, w = img_rgb.shape[:2]
+    mp_face_mesh = mp.solutions.face_mesh
+    with mp_face_mesh.FaceMesh(static_image_mode=True, refine_landmarks=True, max_num_faces=1) as face_mesh:
+        res = face_mesh.process(img_rgb)
+    if not res.multi_face_landmarks:
+        return None, None, (w, h)
+    lms = res.multi_face_landmarks[0].landmark
+    kps = [(lm.x * w, lm.y * h, lm.z) for lm in lms]
+    xs = [p[0] for p in kps]; ys = [p[1] for p in kps]
+    x_min, x_max = max(0, int(np.min(xs))), min(w-1, int(np.max(xs)))
+    y_min, y_max = max(0, int(np.min(ys))), min(h-1, int(np.max(ys)))
+    bbox = (x_min, y_min, x_max-x_min, y_max-y_min)
+    return kps, bbox, (w, h)
+# -----------------------------
+# Symmetry estimation (automatic fallback)
+# -----------------------------
+def compute_symmetry_scores(kps, bbox):
+    if kps is None:
+        return {"enabled": False, "note": "sem rosto detectado"}
+    x, y, bw, bh = bbox
+    cx = x + bw / 2.0
+    # measure mirrored pair distances normalized by bbox width
+    pair_diffs = []
+    for a, b in LEFT_RIGHT_PAIRS:
+        if a < len(kps) and b < len(kps):
+            xa, ya, _ = kps[a]; xb, yb, _ = kps[b]
+            # distance from midline for each
+            da = abs(xa - cx)
+            db = abs(xb - cx)
+            pair_diffs.append(abs(da - db) / max(bw, 1.0))
+    if not pair_diffs:
+        score = 0.0
+    else:
+        score = max(0.0, 1.0 - float(np.mean(pair_diffs))*2.0)  # crude normalization
+    return {
+        "enabled": True,
+        "symmetry_score": round(score, 3),
+        "method": "midline-distance left/right pairs (normalized)",
+        "pairs_used": len(pair_diffs)
+    }
+def load_symmetry_json(fileobj):
+    try:
+        if fileobj is None:
+            return None
+        if isinstance(fileobj, str) and os.path.isfile(fileobj):
+            with open(fileobj, "r", encoding="utf-8") as f:
+                return json.load(f)
+        # gradio may pass a dict with "name"/"data" or a tempfile path
+        if hasattr(fileobj, "name") and os.path.isfile(fileobj.name):
+            with open(fileobj.name, "r", encoding="utf-8") as f:
+                return json.load(f)
+        # If it's bytes / file-like
+        if hasattr(fileobj, "read"):
+            data = fileobj.read()
+            return json.loads(data.decode("utf-8"))
+    except Exception as e:
+        return {"enabled": False, "error": f"Falha ao ler simetria: {e}"}
+    return None
+# -----------------------------
+# Simple rule-based parser for description (PT-BR)
+# -----------------------------
+def parse_description(text: str) -> Dict[str, Any]:
+    t = (text or "").lower()
+    def has(*keys):
+        return any(k in t for k in keys)
+    def extract_between(prefix, suffix):
+        # utility (very simple) for patterns like "olhos ... castanhos"
+        try:
+            i = t.index(prefix) + len(prefix)
+            j = t.index(suffix, i)
+            return t[i:j].strip()
+        except Exception:
+            return None
+    # person
+    age = "indefinido"
+    if has("45", "55", "meia-idade", "maduro"):
+        age = "45-55"
+    elif has("adulto jovem", "20", "30"):
+        age = "adulto jovem"
+    gender = "indefinido"
+    if has("masculino", "homem"):
+        gender = "masculino"
+    if has("feminino", "mulher"):
+        gender = "feminino"
+    # face shape
+    shape = "oval" if has("rosto oval", "formato oval", "oval") else "indefinido"
+    # skin
+    skin_tone = "clara" if has("pele clara") else ("escura" if has("pele escura") else "média" if has("pele média") else "indefinido")
+    skin_texture = "lisa" if has("pele lisa", "textura lisa") else "natural"
+    # eyes
+    eye_shape = "amendoado" if has("olhos amendoados") else "indefinido"
+    eye_color = None
+    if has("olhos castanhos", "castanha escura", "castanho escuro"):
+        eye_color = "castanho escuro"
+    elif has("olhos azuis"):
+        eye_color = "azul"
+    elif has("olhos verdes"):
+        eye_color = "verde"
+    eye_expr = "confiante" if has("confiança", "confiante") else ("amigável" if has("amigável") else "neutro")
+    # eyebrows
+    brow = "arqueadas moderadas" if has("sobrancelhas arqueadas") else ("grossas" if has("sobrancelhas grossas") else ("finas" if has("sobrancelhas finas") else "naturais"))
+    # nose
+    nose_shape = "reto" if has("nariz reto") else "proporcional"
+    nose_size = "médio" if has("tamanho médio") else "indefinido"
+    # mouth
+    mouth_shape = "lábios finos" if has("lábios finos") else "natural"
+    mouth_expr = "sorriso sutil" if has("sorriso sutil", "sorriso suave") else ("neutra" if has("expressão neutra") else "serena")
+    # hair
+    hair_length = "curto" if has("cabelo curto") else "indefinido"
+    hair_style = "penteado para trás" if has("penteado para trás") else "indefinido"
+    hair_color = "castanho escuro" if has("castanha escura", "castanho escuro") else "indefinido"
+    # cheeks / jaw
+    cheeks = "maçãs do rosto proeminentes" if has("maçãs do rosto são proeminentes", "maçãs do rosto proeminentes") else "bochechas suaves"
+    jawline = "mandíbula bem definida" if has("mandíbula bem definida") else "indefinida"
+    chin = "queixo arredondado" if has("queixo é arredondado", "queixo arredondado") else "indefinido"
+    forehead = "testa alta" if has("testa é alta", "testa alta") else "indefinida"
+    # pose / gaze
+    pose_head_tilt = "cabeça ereta" if has("postura é ereta") else "ligeiramente inclinada" if has("ligeiramente inclinada") else "indefinido"
+    gaze = "olhar direto" if has("olhar direto", "apresentação", "discursando") else "neutro"
+    # lighting
+    lighting = "frontal suave" if has("iluminação", "fundo neutro") else "indefinido"
+    # attire / context
+    attire = []
+    if has("camisa social preta"): attire.append("camisa social preta")
+    if has("paletó cinza"): attire.append("paletó cinza de um botão")
+    attire = ", ".join(attire) if attire else ("formal" if has("paletó", "terno", "social") else "casual")
+    accessories = "nenhum" if has("sem óculos", "sem brincos", "não há acessórios") else "indefinido"
+    background = "fundo neutro" if has("fundo neutro") else "indefinido"
+    # high-level impression
+    impression = "confiança e seriedade, com um sorriso sutil" if has("confiança", "seriedade", "sorriso sutil", "sorriso suave") else "neutra"
+    return {
+        "person": {"age": age, "gender": gender},
+        "facial_structure": {
+            "overall_shape": f"formato {shape}" if shape!="indefinido" else "indefinido",
+            "proportions": "proporções equilibradas",
+            "skin_texture": f"pele {skin_texture}",
+            "skin_tone": skin_tone,
+            "jawline": jawline,
+            "chin": chin,
+            "forehead": forehead
+        },
+        "features": {
+            "eyes": {"shape": eye_shape if eye_shape!="indefinido" else "olhos expressivos",
+                     "color": eye_color or "indefinido",
+                     "expression": eye_expr,
+                     "eyebrows": brow},
+            "nose": {"shape": nose_shape, "size": nose_size},
+            "mouth": {"shape": mouth_shape, "expression": mouth_expr},
+            "hair": {"length": hair_length, "style": hair_style, "color": hair_color},
+            "cheeks": {"shape": cheeks}
+        },
+        "pose": {"head_tilt": pose_head_tilt, "gaze": gaze},
+        "lighting": {"direction": lighting},
+        "contextual_details": {"attire": attire, "accessories": accessories, "background": background},
+        "impression": impression
+    }
+# -----------------------------
+# Merge metrics + description + symmetry
+# -----------------------------
+def build_structured_json(desc_struct, metrics, symmetry_block):
+    # expression metrics can mix subjective + objective placeholders
+    expr = {
+        "happiness_score": 0.6 if "sorriso" in (desc_struct.get("impression","") or "") else 0.4,
+        "calmness_score": 0.8
+    }
+    if symmetry_block and isinstance(symmetry_block, dict):
+        expr["symmetry_score"] = symmetry_block.get("symmetry_score", None)
+    out = {
+        **{k: v for k, v in desc_struct.items() if k not in ("impression",)},
+        "expression_metrics": expr,
+        "symmetry": symmetry_block if symmetry_block else {"enabled": False},
+        "metrics": metrics or {}
+    }
+    return out
+def metrics_from_landmarks(kps, bbox, img_size):
+    if kps is None:
+        return {"error": "Nenhum rosto detectado"}
+    (w, h) = img_size
+    x, y, bw, bh = bbox
+    eyeL = kps[LM["leftEyeOuter"]]; eyeR = kps[LM["rightEyeOuter"]]
+    noseL = kps[LM["noseLeft"]]; noseR = kps[LM["noseRight"]]
+    interocular_px = dist2D(eyeL, eyeR)
+    nose_width_px = dist2D(noseL, noseR)
+    interocular_norm = interocular_px / max(bw, 1.0)
+    nose_width_norm = nose_width_px / max(bw, 1.0)
+    return {
+        "image": {"width": int(w), "height": int(h)},
+        "bbox": {"x": int(x), "y": int(y), "w": int(bw), "h": int(bh)},
+        "distances": {
+            "interocular_px": round(float(interocular_px), 2),
+            "interocular_norm": round(float(interocular_norm), 3),
+            "nose_width_px": round(float(nose_width_px), 2),
+            "nose_width_norm": round(float(nose_width_norm), 3),
+        },
+    }
+# -----------------------------
+# Main pipeline
+# -----------------------------
+def process(image, description_text, symmetry_json_file):
+    # 1) Prepare image np array
+    if image is None:
+        return "Envie uma imagem.", None, None
+    if isinstance(image, dict) and "image" in image:
+        np_img = image["image"]
+    else:
+        np_img = np.array(image)
+    # 2) Landmarks
+    kps, bbox, img_size = extract_landmarks(np_img)
+    metrics = metrics_from_landmarks(kps, bbox, img_size)
+    # 3) Symmetry: load file or compute automatic
+    user_sym = load_symmetry_json(symmetry_json_file)
+    if user_sym is None or (isinstance(user_sym, dict) and user_sym.get("enabled") is False and "error" in user_sym):
+        auto_sym = compute_symmetry_scores(kps, bbox) if kps is not None else {"enabled": False}
+        symmetry_block = auto_sym
+        symmetry_origin = "auto"
+    else:
+        # validate presence of required keys minimally
+        if isinstance(user_sym, dict) and ("symmetry_score" in user_sym or "enabled" in user_sym):
+            symmetry_block = user_sym
+        else:
+            symmetry_block = {"enabled": True, "custom": user_sym}
+        symmetry_origin = "uploaded"
+    # 4) Parse description
+    desc_struct = parse_description(description_text or "")
+    # 4.5) Prepare landmarks list for output
+    landmarks_list = []
+    if kps is not None:
+        for (px, py, pz) in kps:
+            landmarks_list.append({"x": round(float(px), 3), "y": round(float(py), 3), "z": round(float(pz), 5)})
+# 4.7) Compute face_signature hash
+    face_signature = None
+    if kps is not None:
+        # serialize landmarks for hash
+        lm_bytes = json.dumps(landmarks_list, ensure_ascii=False, sort_keys=True).encode("utf-8")
+        face_signature = hashlib.sha256(lm_bytes).hexdigest()
+# 5) Merge all
+    structured = build_structured_json(desc_struct, metrics, symmetry_block)
+    structured["face_landmarks"] = landmarks_list
+    if face_signature:
+        structured["face_signature"] = face_signature
+    # 6) Compose Veo 3 scene JSON
+    scene = {
+        "face_landmarks": landmarks_list,
+        "project": {
+            "id": f"veo3_face_clone_{uuid.uuid4().hex[:8]}",
+            "version": "1.0",
+            "created_at": int(time.time())
+        },
+        "structured_description": structured,
+        "controls": {
+            "seed": 123456789,
+            "clone_mode": "photorealistic",
+            "symmetry_origin": symmetry_origin,
+            "face_signature": face_signature if face_signature else None
+        },
+        "prompt": {
+            "summary": "Retrato frontal, fidelidade máxima ao rosto de referência, iluminação neutra, fundo limpo.",
+            "hints": [
+                "respeitar medidas faciais normalizadas (interocular, largura do nariz)",
+                "reproduzir textura e cor do cabelo conforme descrição",
+                "manter expressão: sorriso sutil e olhar direto quando indicado"
+            ]
+        }
+    }
+    # 7) Save JSONs
+    path_landmarks = os.path.join(out_dir, "face_landmarks.json")
+    with open(path_landmarks, "w", encoding="utf-8") as f:
+        json.dump(landmarks_list, f, ensure_ascii=False, indent=2)
+    out_dir = os.path.join("/tmp", f"face_clone_{uuid.uuid4().hex[:8]}")
+    os.makedirs(out_dir, exist_ok=True)
+    path_struct = os.path.join(out_dir, "face_description_structured.json")
+    path_scene = os.path.join(out_dir, "veo3_scene.json")
+    with open(path_struct, "w", encoding="utf-8") as f:
+        json.dump(structured, f, ensure_ascii=False, indent=2)
+    with open(path_scene, "w", encoding="utf-8") as f:
+        json.dump(scene, f, ensure_ascii=False, indent=2)
+    # 8) Return
+    return json.dumps(structured, ensure_ascii=False, indent=2), path_struct, path_scene, path_landmarks
+with gr.Blocks(title="Face Clone JSON Builder • Universal") as demo:
+    gr.Markdown("## Face Clone JSON Builder (Universal)\nEnvie imagem + descrição + JSON de simetria (opcional). O app funde tudo e gera um JSON pronto para Veo 3.")
+    with gr.Row():
+        image = gr.Image(type="numpy", label="Imagem (upload/câmera)")
+        sym = gr.File(label="Simetria (*.json) — opcional")
+    desc = gr.Textbox(lines=12, label="Descrição detalhada (PT/BR)", placeholder="Cole aqui a descrição: formato do rosto, olhos, nariz, boca, cabelo, pele, idade/gênero, postura, vestimenta, expressão...")
+    btn = gr.Button("Gerar JSON para Veo 3")
+    struct_json = gr.Code(label="face_description_structured.json (preview)", language="json")
+    file_struct = gr.File(label="Baixar face_description_structured.json")
+    file_scene = gr.File(label="Baixar veo3_scene.json")
+    file_landmarks = gr.File(label="Baixar face_landmarks.json")
+    btn.click(process, inputs=[image, desc, sym], outputs=[struct_json, file_struct, file_scene, file_landmarks])
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=4.44.0
+mediapipe==0.10.14
+opencv-python>=4.7.0.72
+numpy
+Pillow