Spaces:
Paused
Paused
initial commit
Browse files- Dockerfile +14 -0
- facial_diagnostics.py +90 -0
- main.py +112 -0
- requirements.txt +9 -0
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
COPY requirements.txt .
|
| 7 |
+
RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 \
|
| 8 |
+
&& pip install --no-cache-dir -r requirements.txt
|
| 9 |
+
|
| 10 |
+
COPY . .
|
| 11 |
+
|
| 12 |
+
EXPOSE 7860
|
| 13 |
+
|
| 14 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
facial_diagnostics.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# facial_diagnostics.py
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
import cv2
|
| 5 |
+
import numpy as np
|
| 6 |
+
import mediapipe as mp
|
| 7 |
+
import time
|
| 8 |
+
import json
|
| 9 |
+
import math
|
| 10 |
+
import requests
|
| 11 |
+
from statistics import median
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
mp_face_mesh = mp.solutions.face_mesh
|
| 15 |
+
FACE_MESH = mp_face_mesh.FaceMesh(
|
| 16 |
+
static_image_mode=False,
|
| 17 |
+
max_num_faces=1,
|
| 18 |
+
refine_landmarks=True,
|
| 19 |
+
min_detection_confidence=0.6,
|
| 20 |
+
min_tracking_confidence=0.6
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
LEFT_EYE = [33,160,158,133,153,144]
|
| 24 |
+
RIGHT_EYE = [362,385,387,263,373,380]
|
| 25 |
+
MOUTH = [13,14,78,308]
|
| 26 |
+
SMILE_L = 61
|
| 27 |
+
SMILE_R = 291
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def eye_aspect_ratio(pts, idx):
|
| 31 |
+
a = np.linalg.norm(np.array(pts[idx[1]]) - np.array(pts[idx[5]]))
|
| 32 |
+
b = np.linalg.norm(np.array(pts[idx[2]]) - np.array(pts[idx[4]]))
|
| 33 |
+
c = np.linalg.norm(np.array(pts[idx[0]]) - np.array(pts[idx[3]])) + 1e-8
|
| 34 |
+
return float((a + b) / (2.0 * c))
|
| 35 |
+
|
| 36 |
+
def mouth_ratio(pts):
|
| 37 |
+
top, bottom, left, right = pts[MOUTH[0]], pts[MOUTH[1]], pts[MOUTH[2]], pts[MOUTH[3]]
|
| 38 |
+
return float(np.linalg.norm(np.array(top)-np.array(bottom)) /
|
| 39 |
+
(np.linalg.norm(np.array(left)-np.array(right)) + 1e-8))
|
| 40 |
+
|
| 41 |
+
def head_tilt_angle(pts):
|
| 42 |
+
left = np.mean([pts[33], pts[133]], axis=0)
|
| 43 |
+
right = np.mean([pts[362], pts[263]], axis=0)
|
| 44 |
+
diff = np.array(right) - np.array(left)
|
| 45 |
+
return float(math.degrees(math.atan2(diff[1], diff[0])))
|
| 46 |
+
|
| 47 |
+
def smile_symmetry(pts):
|
| 48 |
+
left = np.array(pts[SMILE_L])
|
| 49 |
+
right = np.array(pts[SMILE_R])
|
| 50 |
+
center = (left + right) / 2
|
| 51 |
+
L = np.linalg.norm(left - center)
|
| 52 |
+
R = np.linalg.norm(right - center)
|
| 53 |
+
return float(min(L, R) / max(L, R)) if L+R != 0 else 1.0
|
| 54 |
+
|
| 55 |
+
def analyze_frame(frame):
|
| 56 |
+
h, w, _ = frame.shape
|
| 57 |
+
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 58 |
+
res = FACE_MESH.process(rgb)
|
| 59 |
+
|
| 60 |
+
if not res.multi_face_landmarks:
|
| 61 |
+
return {"found": False}
|
| 62 |
+
|
| 63 |
+
lm = res.multi_face_landmarks[0]
|
| 64 |
+
pts = [(int(p.x * w), int(p.y * h)) for p in lm.landmark]
|
| 65 |
+
|
| 66 |
+
return {
|
| 67 |
+
"found": True,
|
| 68 |
+
"ear": (eye_aspect_ratio(pts, LEFT_EYE) + eye_aspect_ratio(pts, RIGHT_EYE))/2,
|
| 69 |
+
"mouth_ratio": mouth_ratio(pts),
|
| 70 |
+
"head_tilt": head_tilt_angle(pts),
|
| 71 |
+
"smile_sym": smile_symmetry(pts)
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def aggregate_metrics(metrics):
|
| 76 |
+
if not metrics:
|
| 77 |
+
return {}
|
| 78 |
+
|
| 79 |
+
ears = [m["ear"] for m in metrics]
|
| 80 |
+
mouths = [m["mouth_ratio"] for m in metrics]
|
| 81 |
+
tilts = [m["head_tilt"] for m in metrics]
|
| 82 |
+
smiles = [m["smile_sym"] for m in metrics]
|
| 83 |
+
|
| 84 |
+
return {
|
| 85 |
+
"frames": len(metrics),
|
| 86 |
+
"ear_median": median(ears),
|
| 87 |
+
"mouth_median": median(mouths),
|
| 88 |
+
"tilt_median": median(tilts),
|
| 89 |
+
"smile_median": median(smiles),
|
| 90 |
+
}
|
main.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# main.py
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import cv2
|
| 5 |
+
import time
|
| 6 |
+
import json
|
| 7 |
+
import requests
|
| 8 |
+
import asyncio
|
| 9 |
+
import tempfile
|
| 10 |
+
|
| 11 |
+
from fastapi import FastAPI, WebSocket
|
| 12 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 13 |
+
|
| 14 |
+
from facial_diagnostics import analyze_frame, aggregate_metrics
|
| 15 |
+
|
| 16 |
+
import base64
|
| 17 |
+
|
| 18 |
+
app = FastAPI()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
app.add_middleware(
|
| 22 |
+
CORSMiddleware,
|
| 23 |
+
allow_origins=["*"],
|
| 24 |
+
allow_credentials=True,
|
| 25 |
+
allow_methods=["*"],
|
| 26 |
+
allow_headers=["*"],
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
CLINICAL_STEPS = [
|
| 30 |
+
{"id": "baseline", "prompt": "Please relax and look into the camera you are about to be facially diagnosed.", "duration": 8},
|
| 31 |
+
{"id": "blink", "prompt": "Blink twice rapidly now.", "duration": 8},
|
| 32 |
+
{"id": "smile", "prompt": "Smile widely and hold it.", "duration": 9},
|
| 33 |
+
{"id": "open_mouth", "prompt": "Open your mouth wide.", "duration": 10},
|
| 34 |
+
{"id": "turn_head", "prompt": "Turn your head left then right.", "duration": 10}
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
LLM_URL = "https://remostart-super-test-verstion-three.hf.space/ask"
|
| 39 |
+
ELEVEN_KEY = "sk_bfdd78774121bf55eafeb0064ccf20e4ac5ea36990164470"
|
| 40 |
+
VOICE_ID = "ZthjuvLPty3kTMaNKVKb"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def tts(text: str) -> bytes:
|
| 44 |
+
url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
|
| 45 |
+
headers = {"xi-api-key": ELEVEN_KEY, "Content-Type": "application/json"}
|
| 46 |
+
r = requests.post(url, json={"text": text}, headers=headers)
|
| 47 |
+
return r.content
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def ask_llm(data):
|
| 51 |
+
r = requests.post(LLM_URL, json={"query": json.dumps(data)})
|
| 52 |
+
return r.json().get("answer", "Unable to interpret results.")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@app.get("/")
|
| 56 |
+
def root():
|
| 57 |
+
return {"message": "Facial Diagnostic API running."}
|
| 58 |
+
|
| 59 |
+
@app.websocket("/ws/stream")
|
| 60 |
+
async def ws_stream(ws: WebSocket):
|
| 61 |
+
"""
|
| 62 |
+
The frontend sends webcam frames (base64 JPG)
|
| 63 |
+
Backend processes them in real-time
|
| 64 |
+
"""
|
| 65 |
+
await ws.accept()
|
| 66 |
+
|
| 67 |
+
step_index = 0
|
| 68 |
+
metrics = []
|
| 69 |
+
step_start = time.time()
|
| 70 |
+
|
| 71 |
+
await ws.send_json({"instruction": CLINICAL_STEPS[0]["prompt"]})
|
| 72 |
+
|
| 73 |
+
while True:
|
| 74 |
+
msg = await ws.receive_text()
|
| 75 |
+
data = json.loads(msg)
|
| 76 |
+
|
| 77 |
+
if "frame" not in data:
|
| 78 |
+
continue
|
| 79 |
+
|
| 80 |
+
frame_bytes = base64.b64decode(data["frame"])
|
| 81 |
+
np_arr = np.frombuffer(frame_bytes, np.uint8)
|
| 82 |
+
frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
result = analyze_frame(frame)
|
| 86 |
+
if result["found"]:
|
| 87 |
+
metrics.append(result)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
if time.time() - step_start >= CLINICAL_STEPS[step_index]["duration"]:
|
| 91 |
+
|
| 92 |
+
summary = aggregate_metrics(metrics)
|
| 93 |
+
await ws.send_json({"step": CLINICAL_STEPS[step_index]["id"], "summary": summary})
|
| 94 |
+
|
| 95 |
+
metrics = []
|
| 96 |
+
step_index += 1
|
| 97 |
+
|
| 98 |
+
if step_index >= len(CLINICAL_STEPS):
|
| 99 |
+
break
|
| 100 |
+
|
| 101 |
+
step_start = time.time()
|
| 102 |
+
await ws.send_json({"instruction": CLINICAL_STEPS[step_index]["prompt"]})
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
final_data = {"steps": CLINICAL_STEPS}
|
| 106 |
+
diagnosis = ask_llm(final_data)
|
| 107 |
+
|
| 108 |
+
audio_bytes = tts(diagnosis)
|
| 109 |
+
audio_b64 = base64.b64encode(audio_bytes).decode()
|
| 110 |
+
|
| 111 |
+
await ws.send_json({"diagnosis": diagnosis, "audio": audio_b64})
|
| 112 |
+
await ws.close()
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
requests
|
| 4 |
+
opencv-python
|
| 5 |
+
mediapipe
|
| 6 |
+
numpy
|
| 7 |
+
python-multipart
|
| 8 |
+
python-magic
|
| 9 |
+
python-vlc
|