facial_analysis / main.py
drrobot9's picture
Update main.py
905409e verified
import numpy as np
import cv2
import json
import requests
import tempfile
import base64
from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from typing import List
from facial_diagnostics import analyze_frame, aggregate_metrics
app = FastAPI(title="DrRobot Facial Diagnostic API")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
CLINICAL_STEPS = [
{"id": "baseline", "prompt": "Please relax and look into the camera.", "duration": 10},
{"id": "blink", "prompt": "Blink twice rapidly now.", "duration": 9},
{"id": "smile", "prompt": "Smile widely and hold it.", "duration": 9},
{"id": "open_mouth", "prompt": "Open your mouth wide.", "duration": 9},
{"id": "turn_head", "prompt": "Turn your head left then right.", "duration": 10}
]
LLM_URL = "https://drrobot9-doctor-robot-ai.hf.space/ask"
ELEVEN_KEY = "sk_635e3b45c1d0d13c62ef9de381b8f6fc45eaa3dcb3bf9715"
VOICE_ID = "ZthjuvLPty3kTMaNKVKb"
def tts(text: str) -> str:
url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
headers = {"xi-api-key": ELEVEN_KEY, "Content-Type": "application/json"}
r = requests.post(url, json={"text": text}, headers=headers)
out_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
with open(out_file, "wb") as f:
f.write(r.content)
return out_file
def ask_llm(data):
r = requests.post(LLM_URL, json={"query": json.dumps(data)})
return r.json().get("answer", "Unable to interpret results.")
@app.get("/")
def root():
return {"message": "Facial Diagnostic API running."}
@app.post("/analyze/")
async def analyze_frames(frames: List[UploadFile] = File(...)):
"""
Frontend sends multiple frames as files.
Backend returns aggregated metrics and TTS audio base64.
"""
metrics = []
for file in frames:
contents = await file.read()
np_arr = np.frombuffer(contents, np.uint8)
frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
result = analyze_frame(frame)
if result["found"]:
metrics.append(result)
summary = aggregate_metrics(metrics)
diagnosis_text = ask_llm(summary)
audio_file = tts(diagnosis_text)
with open(audio_file, "rb") as f:
audio_b64 = base64.b64encode(f.read()).decode()
return {
"summary": summary,
"diagnosis": diagnosis_text,
"audio_base64": audio_b64
}