Spaces:
Paused
Paused
File size: 2,522 Bytes
58da532 181ed1d 58da532 181ed1d 58da532 17e8383 905409e 58da532 181ed1d 58da532 181ed1d 58da532 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import numpy as np
import cv2
import json
import requests
import tempfile
import base64
from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from typing import List
from facial_diagnostics import analyze_frame, aggregate_metrics
app = FastAPI(title="DrRobot Facial Diagnostic API")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
CLINICAL_STEPS = [
{"id": "baseline", "prompt": "Please relax and look into the camera.", "duration": 10},
{"id": "blink", "prompt": "Blink twice rapidly now.", "duration": 9},
{"id": "smile", "prompt": "Smile widely and hold it.", "duration": 9},
{"id": "open_mouth", "prompt": "Open your mouth wide.", "duration": 9},
{"id": "turn_head", "prompt": "Turn your head left then right.", "duration": 10}
]
LLM_URL = "https://drrobot9-doctor-robot-ai.hf.space/ask"
ELEVEN_KEY = "sk_635e3b45c1d0d13c62ef9de381b8f6fc45eaa3dcb3bf9715"
VOICE_ID = "ZthjuvLPty3kTMaNKVKb"
def tts(text: str) -> str:
url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
headers = {"xi-api-key": ELEVEN_KEY, "Content-Type": "application/json"}
r = requests.post(url, json={"text": text}, headers=headers)
out_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
with open(out_file, "wb") as f:
f.write(r.content)
return out_file
def ask_llm(data):
r = requests.post(LLM_URL, json={"query": json.dumps(data)})
return r.json().get("answer", "Unable to interpret results.")
@app.get("/")
def root():
return {"message": "Facial Diagnostic API running."}
@app.post("/analyze/")
async def analyze_frames(frames: List[UploadFile] = File(...)):
"""
Frontend sends multiple frames as files.
Backend returns aggregated metrics and TTS audio base64.
"""
metrics = []
for file in frames:
contents = await file.read()
np_arr = np.frombuffer(contents, np.uint8)
frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
result = analyze_frame(frame)
if result["found"]:
metrics.append(result)
summary = aggregate_metrics(metrics)
diagnosis_text = ask_llm(summary)
audio_file = tts(diagnosis_text)
with open(audio_file, "rb") as f:
audio_b64 = base64.b64encode(f.read()).decode()
return {
"summary": summary,
"diagnosis": diagnosis_text,
"audio_base64": audio_b64
}
|