Spaces:

ab646
/

dam-inference

Sleeping

File size: 3,863 Bytes

a06c4af

"""
Loop Mind — DAM Voice Biomarker Inference Space
================================================
Wraps KintsugiHealth/DAM for voice biomarker analysis.
Accepts audio, returns depression/anxiety severity scores.

Deployed as a Hugging Face Space (Gradio).
Called by the Loop Mind /voice-biomarker Supabase Edge Function.
"""

import sys
import os
import subprocess
import warnings
import json

import gradio as gr
import torch
import torchaudio

warnings.filterwarnings("ignore")

# Download the DAM model on first run
if not os.path.exists("dam"):
    print("Downloading KintsugiHealth/DAM model (~1GB)...")
    subprocess.run(["git", "clone", "https://huggingface.co/KintsugiHealth/dam"])

sys.path.append(os.path.abspath("dam"))

print("Loading DAM pipeline...")
try:
    from pipeline import Pipeline
    dam_pipeline = Pipeline()
    print("DAM model loaded successfully.")
except Exception as e:
    print(f"Failed to load DAM model: {e}")
    dam_pipeline = None


DEP_LABELS = {0: "none", 1: "mild-moderate", 2: "severe"}
ANX_LABELS = {0: "none", 1: "mild", 2: "moderate", 3: "severe"}


def analyze_audio(audio_filepath: str) -> str:
    """
    Accepts an audio file path, runs DAM inference, returns JSON string
    with depression/anxiety scores (both quantized and raw).
    """
    if audio_filepath is None:
        return json.dumps({"error": "No audio provided"})

    if dam_pipeline is None:
        return json.dumps({"error": "Model not loaded"})

    try:
        # Pre-process: convert to mono if needed
        waveform, sample_rate = torchaudio.load(audio_filepath)
        if waveform.shape[0] > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)
            audio_filepath = "temp_mono.wav"
            torchaudio.save(audio_filepath, waveform, sample_rate)

        # Run inference (both quantized and raw)
        res_q = dam_pipeline.run_on_file(audio_filepath, quantize=True)
        res_r = dam_pipeline.run_on_file(audio_filepath, quantize=False)

        # Extract and normalize scores
        dep_q = int(res_q.get("depression", 0).item() if hasattr(res_q.get("depression", 0), "item") else res_q.get("depression", 0))
        anx_q = int(res_q.get("anxiety", 0).item() if hasattr(res_q.get("anxiety", 0), "item") else res_q.get("anxiety", 0))
        dep_r = float(res_r.get("depression", 0.0).item() if hasattr(res_r.get("depression", 0.0), "item") else res_r.get("depression", 0.0))
        anx_r = float(res_r.get("anxiety", 0.0).item() if hasattr(res_r.get("anxiety", 0.0), "item") else res_r.get("anxiety", 0.0))

        result = {
            "depression": dep_q,
            "depression_label": DEP_LABELS.get(dep_q, "unknown"),
            "anxiety": anx_q,
            "anxiety_label": ANX_LABELS.get(anx_q, "unknown"),
            "raw_depression": round(dep_r, 4),
            "raw_anxiety": round(anx_r, 4),
            "model": "KintsugiHealth/dam",
        }

        return json.dumps(result)

    except Exception as e:
        return json.dumps({"error": str(e)})


# Clean up temp file after processing
def analyze_and_cleanup(audio_filepath: str) -> str:
    result = analyze_audio(audio_filepath)
    # Delete temp mono file if created
    if os.path.exists("temp_mono.wav"):
        try:
            os.unlink("temp_mono.wav")
        except OSError:
            pass
    return result


demo = gr.Interface(
    fn=analyze_and_cleanup,
    inputs=gr.Audio(type="filepath", label="Upload audio or record (30+ seconds recommended)"),
    outputs=gr.Textbox(label="Analysis Result (JSON)", lines=10),
    title="Loop Mind — Voice Biomarker Analysis",
    description="Powered by KintsugiHealth/DAM. Returns depression and anxiety severity scores from voice acoustic features. For research and wellness tracking only — not a clinical diagnosis.",
    theme="soft",
)

demo.launch()