dam-inference / app.py
ab646's picture
Add DAM voice biomarker inference app
a06c4af
"""
Loop Mind — DAM Voice Biomarker Inference Space
================================================
Wraps KintsugiHealth/DAM for voice biomarker analysis.
Accepts audio, returns depression/anxiety severity scores.
Deployed as a Hugging Face Space (Gradio).
Called by the Loop Mind /voice-biomarker Supabase Edge Function.
"""
import sys
import os
import subprocess
import warnings
import json
import gradio as gr
import torch
import torchaudio
warnings.filterwarnings("ignore")
# Download the DAM model on first run
if not os.path.exists("dam"):
print("Downloading KintsugiHealth/DAM model (~1GB)...")
subprocess.run(["git", "clone", "https://huggingface.co/KintsugiHealth/dam"])
sys.path.append(os.path.abspath("dam"))
print("Loading DAM pipeline...")
try:
from pipeline import Pipeline
dam_pipeline = Pipeline()
print("DAM model loaded successfully.")
except Exception as e:
print(f"Failed to load DAM model: {e}")
dam_pipeline = None
DEP_LABELS = {0: "none", 1: "mild-moderate", 2: "severe"}
ANX_LABELS = {0: "none", 1: "mild", 2: "moderate", 3: "severe"}
def analyze_audio(audio_filepath: str) -> str:
"""
Accepts an audio file path, runs DAM inference, returns JSON string
with depression/anxiety scores (both quantized and raw).
"""
if audio_filepath is None:
return json.dumps({"error": "No audio provided"})
if dam_pipeline is None:
return json.dumps({"error": "Model not loaded"})
try:
# Pre-process: convert to mono if needed
waveform, sample_rate = torchaudio.load(audio_filepath)
if waveform.shape[0] > 1:
waveform = torch.mean(waveform, dim=0, keepdim=True)
audio_filepath = "temp_mono.wav"
torchaudio.save(audio_filepath, waveform, sample_rate)
# Run inference (both quantized and raw)
res_q = dam_pipeline.run_on_file(audio_filepath, quantize=True)
res_r = dam_pipeline.run_on_file(audio_filepath, quantize=False)
# Extract and normalize scores
dep_q = int(res_q.get("depression", 0).item() if hasattr(res_q.get("depression", 0), "item") else res_q.get("depression", 0))
anx_q = int(res_q.get("anxiety", 0).item() if hasattr(res_q.get("anxiety", 0), "item") else res_q.get("anxiety", 0))
dep_r = float(res_r.get("depression", 0.0).item() if hasattr(res_r.get("depression", 0.0), "item") else res_r.get("depression", 0.0))
anx_r = float(res_r.get("anxiety", 0.0).item() if hasattr(res_r.get("anxiety", 0.0), "item") else res_r.get("anxiety", 0.0))
result = {
"depression": dep_q,
"depression_label": DEP_LABELS.get(dep_q, "unknown"),
"anxiety": anx_q,
"anxiety_label": ANX_LABELS.get(anx_q, "unknown"),
"raw_depression": round(dep_r, 4),
"raw_anxiety": round(anx_r, 4),
"model": "KintsugiHealth/dam",
}
return json.dumps(result)
except Exception as e:
return json.dumps({"error": str(e)})
# Clean up temp file after processing
def analyze_and_cleanup(audio_filepath: str) -> str:
result = analyze_audio(audio_filepath)
# Delete temp mono file if created
if os.path.exists("temp_mono.wav"):
try:
os.unlink("temp_mono.wav")
except OSError:
pass
return result
demo = gr.Interface(
fn=analyze_and_cleanup,
inputs=gr.Audio(type="filepath", label="Upload audio or record (30+ seconds recommended)"),
outputs=gr.Textbox(label="Analysis Result (JSON)", lines=10),
title="Loop Mind — Voice Biomarker Analysis",
description="Powered by KintsugiHealth/DAM. Returns depression and anxiety severity scores from voice acoustic features. For research and wellness tracking only — not a clinical diagnosis.",
theme="soft",
)
demo.launch()