File size: 2,522 Bytes
58da532
 
 
 
 
 
 
 
181ed1d
58da532
 
 
 
 
 
 
 
 
 
 
 
 
 
181ed1d
 
 
 
 
58da532
 
17e8383
905409e
58da532
 
 
 
 
 
 
 
 
 
 
 
 
181ed1d
58da532
 
 
 
 
 
 
 
 
 
 
 
 
181ed1d
58da532
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import numpy as np
import cv2
import json
import requests
import tempfile
import base64
from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from typing import List

from facial_diagnostics import analyze_frame, aggregate_metrics

app = FastAPI(title="DrRobot Facial Diagnostic API")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

CLINICAL_STEPS = [
    {"id": "baseline", "prompt": "Please relax and look into the camera.", "duration": 10},
    {"id": "blink", "prompt": "Blink twice rapidly now.", "duration": 9},
    {"id": "smile", "prompt": "Smile widely and hold it.", "duration": 9},
    {"id": "open_mouth", "prompt": "Open your mouth wide.", "duration": 9},
    {"id": "turn_head", "prompt": "Turn your head left then right.", "duration": 10}
]

LLM_URL = "https://drrobot9-doctor-robot-ai.hf.space/ask"
ELEVEN_KEY = "sk_635e3b45c1d0d13c62ef9de381b8f6fc45eaa3dcb3bf9715"
VOICE_ID = "ZthjuvLPty3kTMaNKVKb"


def tts(text: str) -> str:
    url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
    headers = {"xi-api-key": ELEVEN_KEY, "Content-Type": "application/json"}
    r = requests.post(url, json={"text": text}, headers=headers)
    out_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
    with open(out_file, "wb") as f:
        f.write(r.content)
    return out_file


def ask_llm(data):
    r = requests.post(LLM_URL, json={"query": json.dumps(data)})
    return r.json().get("answer", "Unable to interpret results.")


@app.get("/")
def root():
    return {"message": "Facial Diagnostic API running."}


@app.post("/analyze/")
async def analyze_frames(frames: List[UploadFile] = File(...)):
    """
    Frontend sends multiple frames as files.
    Backend returns aggregated metrics and TTS audio base64.
    """
    metrics = []

    for file in frames:
        contents = await file.read()
        np_arr = np.frombuffer(contents, np.uint8)
        frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
        result = analyze_frame(frame)
        if result["found"]:
            metrics.append(result)

    summary = aggregate_metrics(metrics)
    diagnosis_text = ask_llm(summary)

    audio_file = tts(diagnosis_text)
    with open(audio_file, "rb") as f:
        audio_b64 = base64.b64encode(f.read()).decode()

    return {
        "summary": summary,
        "diagnosis": diagnosis_text,
        "audio_base64": audio_b64
    }