drrobot9 commited on
Commit
ffdaad9
·
verified ·
1 Parent(s): 59be1c5

initial commit

Browse files
Files changed (4) hide show
  1. Dockerfile +14 -0
  2. facial_diagnostics.py +90 -0
  3. main.py +112 -0
  4. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile
2
+ FROM python:3.10-slim
3
+
4
+ WORKDIR /app
5
+
6
+ COPY requirements.txt .
7
+ RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 \
8
+ && pip install --no-cache-dir -r requirements.txt
9
+
10
+ COPY . .
11
+
12
+ EXPOSE 7860
13
+
14
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
facial_diagnostics.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # facial_diagnostics.py
2
+
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import mediapipe as mp
7
+ import time
8
+ import json
9
+ import math
10
+ import requests
11
+ from statistics import median
12
+
13
+
14
+ mp_face_mesh = mp.solutions.face_mesh
15
+ FACE_MESH = mp_face_mesh.FaceMesh(
16
+ static_image_mode=False,
17
+ max_num_faces=1,
18
+ refine_landmarks=True,
19
+ min_detection_confidence=0.6,
20
+ min_tracking_confidence=0.6
21
+ )
22
+
23
+ LEFT_EYE = [33,160,158,133,153,144]
24
+ RIGHT_EYE = [362,385,387,263,373,380]
25
+ MOUTH = [13,14,78,308]
26
+ SMILE_L = 61
27
+ SMILE_R = 291
28
+
29
+
30
+ def eye_aspect_ratio(pts, idx):
31
+ a = np.linalg.norm(np.array(pts[idx[1]]) - np.array(pts[idx[5]]))
32
+ b = np.linalg.norm(np.array(pts[idx[2]]) - np.array(pts[idx[4]]))
33
+ c = np.linalg.norm(np.array(pts[idx[0]]) - np.array(pts[idx[3]])) + 1e-8
34
+ return float((a + b) / (2.0 * c))
35
+
36
+ def mouth_ratio(pts):
37
+ top, bottom, left, right = pts[MOUTH[0]], pts[MOUTH[1]], pts[MOUTH[2]], pts[MOUTH[3]]
38
+ return float(np.linalg.norm(np.array(top)-np.array(bottom)) /
39
+ (np.linalg.norm(np.array(left)-np.array(right)) + 1e-8))
40
+
41
+ def head_tilt_angle(pts):
42
+ left = np.mean([pts[33], pts[133]], axis=0)
43
+ right = np.mean([pts[362], pts[263]], axis=0)
44
+ diff = np.array(right) - np.array(left)
45
+ return float(math.degrees(math.atan2(diff[1], diff[0])))
46
+
47
+ def smile_symmetry(pts):
48
+ left = np.array(pts[SMILE_L])
49
+ right = np.array(pts[SMILE_R])
50
+ center = (left + right) / 2
51
+ L = np.linalg.norm(left - center)
52
+ R = np.linalg.norm(right - center)
53
+ return float(min(L, R) / max(L, R)) if L+R != 0 else 1.0
54
+
55
+ def analyze_frame(frame):
56
+ h, w, _ = frame.shape
57
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
58
+ res = FACE_MESH.process(rgb)
59
+
60
+ if not res.multi_face_landmarks:
61
+ return {"found": False}
62
+
63
+ lm = res.multi_face_landmarks[0]
64
+ pts = [(int(p.x * w), int(p.y * h)) for p in lm.landmark]
65
+
66
+ return {
67
+ "found": True,
68
+ "ear": (eye_aspect_ratio(pts, LEFT_EYE) + eye_aspect_ratio(pts, RIGHT_EYE))/2,
69
+ "mouth_ratio": mouth_ratio(pts),
70
+ "head_tilt": head_tilt_angle(pts),
71
+ "smile_sym": smile_symmetry(pts)
72
+ }
73
+
74
+
75
+ def aggregate_metrics(metrics):
76
+ if not metrics:
77
+ return {}
78
+
79
+ ears = [m["ear"] for m in metrics]
80
+ mouths = [m["mouth_ratio"] for m in metrics]
81
+ tilts = [m["head_tilt"] for m in metrics]
82
+ smiles = [m["smile_sym"] for m in metrics]
83
+
84
+ return {
85
+ "frames": len(metrics),
86
+ "ear_median": median(ears),
87
+ "mouth_median": median(mouths),
88
+ "tilt_median": median(tilts),
89
+ "smile_median": median(smiles),
90
+ }
main.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+
3
+ import numpy as np
4
+ import cv2
5
+ import time
6
+ import json
7
+ import requests
8
+ import asyncio
9
+ import tempfile
10
+
11
+ from fastapi import FastAPI, WebSocket
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+
14
+ from facial_diagnostics import analyze_frame, aggregate_metrics
15
+
16
+ import base64
17
+
18
+ app = FastAPI()
19
+
20
+
21
+ app.add_middleware(
22
+ CORSMiddleware,
23
+ allow_origins=["*"],
24
+ allow_credentials=True,
25
+ allow_methods=["*"],
26
+ allow_headers=["*"],
27
+ )
28
+
29
+ CLINICAL_STEPS = [
30
+ {"id": "baseline", "prompt": "Please relax and look into the camera you are about to be facially diagnosed.", "duration": 8},
31
+ {"id": "blink", "prompt": "Blink twice rapidly now.", "duration": 8},
32
+ {"id": "smile", "prompt": "Smile widely and hold it.", "duration": 9},
33
+ {"id": "open_mouth", "prompt": "Open your mouth wide.", "duration": 10},
34
+ {"id": "turn_head", "prompt": "Turn your head left then right.", "duration": 10}
35
+ ]
36
+
37
+
38
+ LLM_URL = "https://remostart-super-test-verstion-three.hf.space/ask"
39
+ ELEVEN_KEY = "sk_bfdd78774121bf55eafeb0064ccf20e4ac5ea36990164470"
40
+ VOICE_ID = "ZthjuvLPty3kTMaNKVKb"
41
+
42
+
43
+ def tts(text: str) -> bytes:
44
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
45
+ headers = {"xi-api-key": ELEVEN_KEY, "Content-Type": "application/json"}
46
+ r = requests.post(url, json={"text": text}, headers=headers)
47
+ return r.content
48
+
49
+
50
+ def ask_llm(data):
51
+ r = requests.post(LLM_URL, json={"query": json.dumps(data)})
52
+ return r.json().get("answer", "Unable to interpret results.")
53
+
54
+
55
+ @app.get("/")
56
+ def root():
57
+ return {"message": "Facial Diagnostic API running."}
58
+
59
+ @app.websocket("/ws/stream")
60
+ async def ws_stream(ws: WebSocket):
61
+ """
62
+ The frontend sends webcam frames (base64 JPG)
63
+ Backend processes them in real-time
64
+ """
65
+ await ws.accept()
66
+
67
+ step_index = 0
68
+ metrics = []
69
+ step_start = time.time()
70
+
71
+ await ws.send_json({"instruction": CLINICAL_STEPS[0]["prompt"]})
72
+
73
+ while True:
74
+ msg = await ws.receive_text()
75
+ data = json.loads(msg)
76
+
77
+ if "frame" not in data:
78
+ continue
79
+
80
+ frame_bytes = base64.b64decode(data["frame"])
81
+ np_arr = np.frombuffer(frame_bytes, np.uint8)
82
+ frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
83
+
84
+
85
+ result = analyze_frame(frame)
86
+ if result["found"]:
87
+ metrics.append(result)
88
+
89
+
90
+ if time.time() - step_start >= CLINICAL_STEPS[step_index]["duration"]:
91
+
92
+ summary = aggregate_metrics(metrics)
93
+ await ws.send_json({"step": CLINICAL_STEPS[step_index]["id"], "summary": summary})
94
+
95
+ metrics = []
96
+ step_index += 1
97
+
98
+ if step_index >= len(CLINICAL_STEPS):
99
+ break
100
+
101
+ step_start = time.time()
102
+ await ws.send_json({"instruction": CLINICAL_STEPS[step_index]["prompt"]})
103
+
104
+
105
+ final_data = {"steps": CLINICAL_STEPS}
106
+ diagnosis = ask_llm(final_data)
107
+
108
+ audio_bytes = tts(diagnosis)
109
+ audio_b64 = base64.b64encode(audio_bytes).decode()
110
+
111
+ await ws.send_json({"diagnosis": diagnosis, "audio": audio_b64})
112
+ await ws.close()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ requests
4
+ opencv-python
5
+ mediapipe
6
+ numpy
7
+ python-multipart
8
+ python-magic
9
+ python-vlc