drrobot9 commited on
Commit
58da532
·
verified ·
1 Parent(s): 06d31de

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +83 -112
main.py CHANGED
@@ -1,112 +1,83 @@
1
- # main.py
2
-
3
- import numpy as np
4
- import cv2
5
- import time
6
- import json
7
- import requests
8
- import asyncio
9
- import tempfile
10
-
11
- from fastapi import FastAPI, WebSocket
12
- from fastapi.middleware.cors import CORSMiddleware
13
-
14
- from facial_diagnostics import analyze_frame, aggregate_metrics
15
-
16
- import base64
17
-
18
- app = FastAPI()
19
-
20
-
21
- app.add_middleware(
22
- CORSMiddleware,
23
- allow_origins=["*"],
24
- allow_credentials=True,
25
- allow_methods=["*"],
26
- allow_headers=["*"],
27
- )
28
-
29
- CLINICAL_STEPS = [
30
- {"id": "baseline", "prompt": "Please relax and look into the camera you are about to be facially diagnosed.", "duration": 8},
31
- {"id": "blink", "prompt": "Blink twice rapidly now.", "duration": 8},
32
- {"id": "smile", "prompt": "Smile widely and hold it.", "duration": 9},
33
- {"id": "open_mouth", "prompt": "Open your mouth wide.", "duration": 10},
34
- {"id": "turn_head", "prompt": "Turn your head left then right.", "duration": 10}
35
- ]
36
-
37
-
38
- LLM_URL = "https://remostart-super-test-verstion-three.hf.space/ask"
39
- ELEVEN_KEY = "sk_bfdd78774121bf55eafeb0064ccf20e4ac5ea36990164470"
40
- VOICE_ID = "ZthjuvLPty3kTMaNKVKb"
41
-
42
-
43
- def tts(text: str) -> bytes:
44
- url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
45
- headers = {"xi-api-key": ELEVEN_KEY, "Content-Type": "application/json"}
46
- r = requests.post(url, json={"text": text}, headers=headers)
47
- return r.content
48
-
49
-
50
- def ask_llm(data):
51
- r = requests.post(LLM_URL, json={"query": json.dumps(data)})
52
- return r.json().get("answer", "Unable to interpret results.")
53
-
54
-
55
- @app.get("/")
56
- def root():
57
- return {"message": "Facial Diagnostic API running."}
58
-
59
- @app.websocket("/ws/stream")
60
- async def ws_stream(ws: WebSocket):
61
- """
62
- The frontend sends webcam frames (base64 JPG)
63
- Backend processes them in real-time
64
- """
65
- await ws.accept()
66
-
67
- step_index = 0
68
- metrics = []
69
- step_start = time.time()
70
-
71
- await ws.send_json({"instruction": CLINICAL_STEPS[0]["prompt"]})
72
-
73
- while True:
74
- msg = await ws.receive_text()
75
- data = json.loads(msg)
76
-
77
- if "frame" not in data:
78
- continue
79
-
80
- frame_bytes = base64.b64decode(data["frame"])
81
- np_arr = np.frombuffer(frame_bytes, np.uint8)
82
- frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
83
-
84
-
85
- result = analyze_frame(frame)
86
- if result["found"]:
87
- metrics.append(result)
88
-
89
-
90
- if time.time() - step_start >= CLINICAL_STEPS[step_index]["duration"]:
91
-
92
- summary = aggregate_metrics(metrics)
93
- await ws.send_json({"step": CLINICAL_STEPS[step_index]["id"], "summary": summary})
94
-
95
- metrics = []
96
- step_index += 1
97
-
98
- if step_index >= len(CLINICAL_STEPS):
99
- break
100
-
101
- step_start = time.time()
102
- await ws.send_json({"instruction": CLINICAL_STEPS[step_index]["prompt"]})
103
-
104
-
105
- final_data = {"steps": CLINICAL_STEPS}
106
- diagnosis = ask_llm(final_data)
107
-
108
- audio_bytes = tts(diagnosis)
109
- audio_b64 = base64.b64encode(audio_bytes).decode()
110
-
111
- await ws.send_json({"diagnosis": diagnosis, "audio": audio_b64})
112
- await ws.close()
 
1
+ import numpy as np
2
+ import cv2
3
+ import json
4
+ import requests
5
+ import tempfile
6
+ import base64
7
+ from fastapi import FastAPI, UploadFile, File
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from typing import List
10
+
11
+ from facial_diagnostics import analyze_frame, aggregate_metrics
12
+
13
+ app = FastAPI(title="DrRobot Facial Diagnostic API")
14
+
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"],
18
+ allow_credentials=True,
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
+
23
+ CLINICAL_STEPS = [
24
+ {"id": "baseline", "prompt": "Please relax and look into the camera.", "duration": 5},
25
+ {"id": "blink", "prompt": "Blink twice rapidly now.", "duration": 5},
26
+ {"id": "smile", "prompt": "Smile widely and hold it.", "duration": 5},
27
+ {"id": "open_mouth", "prompt": "Open your mouth wide.", "duration": 5},
28
+ {"id": "turn_head", "prompt": "Turn your head left then right.", "duration": 5}
29
+ ]
30
+
31
+ LLM_URL = "https://remostart-super-test-verstion-three.hf.space/ask"
32
+ ELEVEN_KEY = "sk_bfdd78774121bf55eafeb0064ccf20e4ac5ea36990164470"
33
+ VOICE_ID = "ZthjuvLPty3kTMaNKVKb"
34
+
35
+
36
+ def tts(text: str) -> str:
37
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
38
+ headers = {"xi-api-key": ELEVEN_KEY, "Content-Type": "application/json"}
39
+ r = requests.post(url, json={"text": text}, headers=headers)
40
+ out_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
41
+ with open(out_file, "wb") as f:
42
+ f.write(r.content)
43
+ return out_file
44
+
45
+
46
+ def ask_llm(data):
47
+ r = requests.post(LLM_URL, json={"query": json.dumps(data)})
48
+ return r.json().get("answer", "Unable to interpret results.")
49
+
50
+
51
+ @app.get("/")
52
+ def root():
53
+ return {"message": "Facial Diagnostic API running."}
54
+
55
+
56
+ @app.post("/analyze/")
57
+ async def analyze_frames(frames: List[UploadFile] = File(...)):
58
+ """
59
+ Frontend sends multiple frames as files.
60
+ Backend returns aggregated metrics and TTS audio base64.
61
+ """
62
+ metrics = []
63
+
64
+ for file in frames:
65
+ contents = await file.read()
66
+ np_arr = np.frombuffer(contents, np.uint8)
67
+ frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
68
+ result = analyze_frame(frame)
69
+ if result["found"]:
70
+ metrics.append(result)
71
+
72
+ summary = aggregate_metrics(metrics)
73
+ diagnosis_text = ask_llm(summary)
74
+
75
+ audio_file = tts(diagnosis_text)
76
+ with open(audio_file, "rb") as f:
77
+ audio_b64 = base64.b64encode(f.read()).decode()
78
+
79
+ return {
80
+ "summary": summary,
81
+ "diagnosis": diagnosis_text,
82
+ "audio_base64": audio_b64
83
+ }