import numpy as np import cv2 import json import requests import tempfile import base64 from fastapi import FastAPI, UploadFile, File from fastapi.middleware.cors import CORSMiddleware from typing import List from facial_diagnostics import analyze_frame, aggregate_metrics app = FastAPI(title="DrRobot Facial Diagnostic API") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) CLINICAL_STEPS = [ {"id": "baseline", "prompt": "Please relax and look into the camera.", "duration": 10}, {"id": "blink", "prompt": "Blink twice rapidly now.", "duration": 9}, {"id": "smile", "prompt": "Smile widely and hold it.", "duration": 9}, {"id": "open_mouth", "prompt": "Open your mouth wide.", "duration": 9}, {"id": "turn_head", "prompt": "Turn your head left then right.", "duration": 10} ] LLM_URL = "https://drrobot9-doctor-robot-ai.hf.space/ask" ELEVEN_KEY = "sk_635e3b45c1d0d13c62ef9de381b8f6fc45eaa3dcb3bf9715" VOICE_ID = "ZthjuvLPty3kTMaNKVKb" def tts(text: str) -> str: url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}" headers = {"xi-api-key": ELEVEN_KEY, "Content-Type": "application/json"} r = requests.post(url, json={"text": text}, headers=headers) out_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name with open(out_file, "wb") as f: f.write(r.content) return out_file def ask_llm(data): r = requests.post(LLM_URL, json={"query": json.dumps(data)}) return r.json().get("answer", "Unable to interpret results.") @app.get("/") def root(): return {"message": "Facial Diagnostic API running."} @app.post("/analyze/") async def analyze_frames(frames: List[UploadFile] = File(...)): """ Frontend sends multiple frames as files. Backend returns aggregated metrics and TTS audio base64. """ metrics = [] for file in frames: contents = await file.read() np_arr = np.frombuffer(contents, np.uint8) frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) result = analyze_frame(frame) if result["found"]: metrics.append(result) summary = aggregate_metrics(metrics) diagnosis_text = ask_llm(summary) audio_file = tts(diagnosis_text) with open(audio_file, "rb") as f: audio_b64 = base64.b64encode(f.read()).decode() return { "summary": summary, "diagnosis": diagnosis_text, "audio_base64": audio_b64 }