Spaces:

drrobot9
/

facial_analysis

Sleeping

App Files Files Community

drrobot9 commited on Dec 1, 2025

Commit

58da532

verified ·

1 Parent(s): 06d31de

Update main.py

Browse files

Files changed (1) hide show

main.py +83 -112

main.py CHANGED Viewed

@@ -1,112 +1,83 @@
-# main.py
-import numpy as np
-import cv2
-import time
-import json
-import requests
-import asyncio
-import tempfile
-from fastapi import FastAPI, WebSocket
-from fastapi.middleware.cors import CORSMiddleware
-from facial_diagnostics import analyze_frame, aggregate_metrics
-import base64
-app = FastAPI()
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-CLINICAL_STEPS = [
-    {"id": "baseline", "prompt": "Please relax and look into the camera you are about to be facially diagnosed.", "duration": 8},
-    {"id": "blink", "prompt": "Blink twice rapidly now.", "duration": 8},
-    {"id": "smile", "prompt": "Smile widely and hold it.", "duration": 9},
-    {"id": "open_mouth", "prompt": "Open your mouth wide.", "duration": 10},
-    {"id": "turn_head", "prompt": "Turn your head left then right.", "duration": 10}
-]
-LLM_URL = "https://remostart-super-test-verstion-three.hf.space/ask"
-ELEVEN_KEY = "sk_bfdd78774121bf55eafeb0064ccf20e4ac5ea36990164470"
-VOICE_ID = "ZthjuvLPty3kTMaNKVKb"
-def tts(text: str) -> bytes:
-    url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
-    headers = {"xi-api-key": ELEVEN_KEY, "Content-Type": "application/json"}
-    r = requests.post(url, json={"text": text}, headers=headers)
-    return r.content
-def ask_llm(data):
-    r = requests.post(LLM_URL, json={"query": json.dumps(data)})
-    return r.json().get("answer", "Unable to interpret results.")
-@app.get("/")
-def root():
-    return {"message": "Facial Diagnostic API running."}
-@app.websocket("/ws/stream")
-async def ws_stream(ws: WebSocket):
-    """
-    The frontend sends webcam frames (base64 JPG)
-    Backend processes them in real-time
-    """
-    await ws.accept()
-    step_index = 0
-    metrics = []
-    step_start = time.time()
-    await ws.send_json({"instruction": CLINICAL_STEPS[0]["prompt"]})
-    while True:
-        msg = await ws.receive_text()
-        data = json.loads(msg)
-        if "frame" not in data:
-            continue
-        frame_bytes = base64.b64decode(data["frame"])
-        np_arr = np.frombuffer(frame_bytes, np.uint8)
-        frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
-        result = analyze_frame(frame)
-        if result["found"]:
-            metrics.append(result)
-        if time.time() - step_start >= CLINICAL_STEPS[step_index]["duration"]:
-            summary = aggregate_metrics(metrics)
-            await ws.send_json({"step": CLINICAL_STEPS[step_index]["id"], "summary": summary})
-            metrics = []
-            step_index += 1
-            if step_index >= len(CLINICAL_STEPS):
-                break
-            step_start = time.time()
-            await ws.send_json({"instruction": CLINICAL_STEPS[step_index]["prompt"]})
-    final_data = {"steps": CLINICAL_STEPS}
-    diagnosis = ask_llm(final_data)
-    audio_bytes = tts(diagnosis)
-    audio_b64 = base64.b64encode(audio_bytes).decode()
-    await ws.send_json({"diagnosis": diagnosis, "audio": audio_b64})
-    await ws.close()

+import numpy as np
+import cv2
+import json
+import requests
+import tempfile
+import base64
+from fastapi import FastAPI, UploadFile, File
+from fastapi.middleware.cors import CORSMiddleware
+from typing import List
+from facial_diagnostics import analyze_frame, aggregate_metrics
+app = FastAPI(title="DrRobot Facial Diagnostic API")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+CLINICAL_STEPS = [
+    {"id": "baseline", "prompt": "Please relax and look into the camera.", "duration": 5},
+    {"id": "blink", "prompt": "Blink twice rapidly now.", "duration": 5},
+    {"id": "smile", "prompt": "Smile widely and hold it.", "duration": 5},
+    {"id": "open_mouth", "prompt": "Open your mouth wide.", "duration": 5},
+    {"id": "turn_head", "prompt": "Turn your head left then right.", "duration": 5}
+]
+LLM_URL = "https://remostart-super-test-verstion-three.hf.space/ask"
+ELEVEN_KEY = "sk_bfdd78774121bf55eafeb0064ccf20e4ac5ea36990164470"
+VOICE_ID = "ZthjuvLPty3kTMaNKVKb"
+def tts(text: str) -> str:
+    url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
+    headers = {"xi-api-key": ELEVEN_KEY, "Content-Type": "application/json"}
+    r = requests.post(url, json={"text": text}, headers=headers)
+    out_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
+    with open(out_file, "wb") as f:
+        f.write(r.content)
+    return out_file
+def ask_llm(data):
+    r = requests.post(LLM_URL, json={"query": json.dumps(data)})
+    return r.json().get("answer", "Unable to interpret results.")
+@app.get("/")
+def root():
+    return {"message": "Facial Diagnostic API running."}
+@app.post("/analyze/")
+async def analyze_frames(frames: List[UploadFile] = File(...)):
+    """
+    Frontend sends multiple frames as files.
+    Backend returns aggregated metrics and TTS audio base64.
+    """
+    metrics = []
+    for file in frames:
+        contents = await file.read()
+        np_arr = np.frombuffer(contents, np.uint8)
+        frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
+        result = analyze_frame(frame)
+        if result["found"]:
+            metrics.append(result)
+    summary = aggregate_metrics(metrics)
+    diagnosis_text = ask_llm(summary)
+    audio_file = tts(diagnosis_text)
+    with open(audio_file, "rb") as f:
+        audio_b64 = base64.b64encode(f.read()).decode()
+    return {
+        "summary": summary,
+        "diagnosis": diagnosis_text,
+        "audio_base64": audio_b64
+    }