Spaces:

drrobot9
/

facial_analysis

Sleeping

App Files Files Community

drrobot9 commited on Dec 1, 2025

Commit

ffdaad9

verified ·

1 Parent(s): 59be1c5

initial commit

Browse files

Files changed (4) hide show

Dockerfile +14 -0
facial_diagnostics.py +90 -0
main.py +112 -0
requirements.txt +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+# Dockerfile
+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 \
+    && pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

facial_diagnostics.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# facial_diagnostics.py
+import cv2
+import numpy as np
+import mediapipe as mp
+import time
+import json
+import math
+import requests
+from statistics import median
+mp_face_mesh = mp.solutions.face_mesh
+FACE_MESH = mp_face_mesh.FaceMesh(
+    static_image_mode=False,
+    max_num_faces=1,
+    refine_landmarks=True,
+    min_detection_confidence=0.6,
+    min_tracking_confidence=0.6
+)
+LEFT_EYE = [33,160,158,133,153,144]
+RIGHT_EYE = [362,385,387,263,373,380]
+MOUTH = [13,14,78,308]
+SMILE_L = 61
+SMILE_R = 291
+def eye_aspect_ratio(pts, idx):
+    a = np.linalg.norm(np.array(pts[idx[1]]) - np.array(pts[idx[5]]))
+    b = np.linalg.norm(np.array(pts[idx[2]]) - np.array(pts[idx[4]]))
+    c = np.linalg.norm(np.array(pts[idx[0]]) - np.array(pts[idx[3]])) + 1e-8
+    return float((a + b) / (2.0 * c))
+def mouth_ratio(pts):
+    top, bottom, left, right = pts[MOUTH[0]], pts[MOUTH[1]], pts[MOUTH[2]], pts[MOUTH[3]]
+    return float(np.linalg.norm(np.array(top)-np.array(bottom)) /
+                 (np.linalg.norm(np.array(left)-np.array(right)) + 1e-8))
+def head_tilt_angle(pts):
+    left = np.mean([pts[33], pts[133]], axis=0)
+    right = np.mean([pts[362], pts[263]], axis=0)
+    diff = np.array(right) - np.array(left)
+    return float(math.degrees(math.atan2(diff[1], diff[0])))
+def smile_symmetry(pts):
+    left = np.array(pts[SMILE_L])
+    right = np.array(pts[SMILE_R])
+    center = (left + right) / 2
+    L = np.linalg.norm(left - center)
+    R = np.linalg.norm(right - center)
+    return float(min(L, R) / max(L, R)) if L+R != 0 else 1.0
+def analyze_frame(frame):
+    h, w, _ = frame.shape
+    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    res = FACE_MESH.process(rgb)
+    if not res.multi_face_landmarks:
+        return {"found": False}
+    lm = res.multi_face_landmarks[0]
+    pts = [(int(p.x * w), int(p.y * h)) for p in lm.landmark]
+    return {
+        "found": True,
+        "ear": (eye_aspect_ratio(pts, LEFT_EYE) + eye_aspect_ratio(pts, RIGHT_EYE))/2,
+        "mouth_ratio": mouth_ratio(pts),
+        "head_tilt": head_tilt_angle(pts),
+        "smile_sym": smile_symmetry(pts)
+    }
+def aggregate_metrics(metrics):
+    if not metrics:
+        return {}
+    ears = [m["ear"] for m in metrics]
+    mouths = [m["mouth_ratio"] for m in metrics]
+    tilts = [m["head_tilt"] for m in metrics]
+    smiles = [m["smile_sym"] for m in metrics]
+    return {
+        "frames": len(metrics),
+        "ear_median": median(ears),
+        "mouth_median": median(mouths),
+        "tilt_median": median(tilts),
+        "smile_median": median(smiles),
+    }

main.py ADDED Viewed

	@@ -0,0 +1,112 @@

+# main.py
+import numpy as np
+import cv2
+import time
+import json
+import requests
+import asyncio
+import tempfile
+from fastapi import FastAPI, WebSocket
+from fastapi.middleware.cors import CORSMiddleware
+from facial_diagnostics import analyze_frame, aggregate_metrics
+import base64
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+CLINICAL_STEPS = [
+    {"id": "baseline", "prompt": "Please relax and look into the camera you are about to be facially diagnosed.", "duration": 8},
+    {"id": "blink", "prompt": "Blink twice rapidly now.", "duration": 8},
+    {"id": "smile", "prompt": "Smile widely and hold it.", "duration": 9},
+    {"id": "open_mouth", "prompt": "Open your mouth wide.", "duration": 10},
+    {"id": "turn_head", "prompt": "Turn your head left then right.", "duration": 10}
+]
+LLM_URL = "https://remostart-super-test-verstion-three.hf.space/ask"
+ELEVEN_KEY = "sk_bfdd78774121bf55eafeb0064ccf20e4ac5ea36990164470"
+VOICE_ID = "ZthjuvLPty3kTMaNKVKb"
+def tts(text: str) -> bytes:
+    url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
+    headers = {"xi-api-key": ELEVEN_KEY, "Content-Type": "application/json"}
+    r = requests.post(url, json={"text": text}, headers=headers)
+    return r.content
+def ask_llm(data):
+    r = requests.post(LLM_URL, json={"query": json.dumps(data)})
+    return r.json().get("answer", "Unable to interpret results.")
+@app.get("/")
+def root():
+    return {"message": "Facial Diagnostic API running."}
+@app.websocket("/ws/stream")
+async def ws_stream(ws: WebSocket):
+    """
+    The frontend sends webcam frames (base64 JPG)
+    Backend processes them in real-time
+    """
+    await ws.accept()
+    step_index = 0
+    metrics = []
+    step_start = time.time()
+    await ws.send_json({"instruction": CLINICAL_STEPS[0]["prompt"]})
+    while True:
+        msg = await ws.receive_text()
+        data = json.loads(msg)
+        if "frame" not in data:
+            continue
+        frame_bytes = base64.b64decode(data["frame"])
+        np_arr = np.frombuffer(frame_bytes, np.uint8)
+        frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
+        result = analyze_frame(frame)
+        if result["found"]:
+            metrics.append(result)
+        if time.time() - step_start >= CLINICAL_STEPS[step_index]["duration"]:
+            summary = aggregate_metrics(metrics)
+            await ws.send_json({"step": CLINICAL_STEPS[step_index]["id"], "summary": summary})
+            metrics = []
+            step_index += 1
+            if step_index >= len(CLINICAL_STEPS):
+                break
+            step_start = time.time()
+            await ws.send_json({"instruction": CLINICAL_STEPS[step_index]["prompt"]})
+    final_data = {"steps": CLINICAL_STEPS}
+    diagnosis = ask_llm(final_data)
+    audio_bytes = tts(diagnosis)
+    audio_b64 = base64.b64encode(audio_bytes).decode()
+    await ws.send_json({"diagnosis": diagnosis, "audio": audio_b64})
+    await ws.close()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn[standard]
+requests
+opencv-python
+mediapipe
+numpy
+python-multipart
+python-magic
+python-vlc