File size: 2,643 Bytes
996131f 9aa6bc6 996131f 9aa6bc6 996131f 9aa6bc6 996131f 9aa6bc6 996131f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | {
"type": "soft_vote_ensemble",
"models": [
{
"name": "distilbert-base-uncased",
"label": "dapt_distilbert",
"dir": "dapt_distilbert"
},
{
"name": "roberta-base",
"label": "roberta",
"dir": "roberta"
},
{
"name": "microsoft/deberta-base",
"label": "deberta",
"dir": "deberta"
}
],
"label_map": {
"DEPRESSED_MOOD": 0,
"ANHEDONIA": 1,
"APPETITE_CHANGE": 2,
"SLEEP_ISSUES": 3,
"PSYCHOMOTOR": 4,
"FATIGUE": 5,
"WORTHLESSNESS": 6,
"COGNITIVE_ISSUES": 7,
"SUICIDAL_THOUGHTS": 8,
"SPECIAL_CASE": 9,
"NO_SYMPTOM": 10
},
"label_readable": {
"DEPRESSED_MOOD": "Depressed Mood",
"ANHEDONIA": "Loss of Interest / Pleasure",
"APPETITE_CHANGE": "Appetite / Weight Change",
"SLEEP_ISSUES": "Sleep Disturbance",
"PSYCHOMOTOR": "Psychomotor Changes",
"FATIGUE": "Fatigue / Loss of Energy",
"WORTHLESSNESS": "Worthlessness / Guilt",
"COGNITIVE_ISSUES": "Difficulty Concentrating",
"SUICIDAL_THOUGHTS": "Suicidal Ideation",
"SPECIAL_CASE": "Other Clinical Indicator",
"NO_SYMPTOM": "No Symptom Detected"
},
"num_classes": 11,
"pooling": "mean",
"max_length": 128,
"thresholds": {
"DEPRESSED_MOOD": 0.25,
"ANHEDONIA": 0.45,
"APPETITE_CHANGE": 0.5,
"SLEEP_ISSUES": 0.55,
"PSYCHOMOTOR": 0.5,
"FATIGUE": 0.6000000000000001,
"WORTHLESSNESS": 0.4,
"COGNITIVE_ISSUES": 0.15000000000000002,
"SUICIDAL_THOUGHTS": 0.05,
"SPECIAL_CASE": 0.55,
"NO_SYMPTOM": 0.15000000000000002
},
"cv_performance": {
"ensemble_micro_f1": "0.813 \u00b1 0.010",
"ensemble_macro_f1": "0.770 \u00b1 0.017",
"threshold_tuned_micro_f1": 0.82,
"threshold_tuned_macro_f1": 0.792,
"note": "Threshold-tuned metrics have slight optimistic bias (tuned on eval data). True performance is between raw ensemble and tuned metrics."
},
"training_config": {
"data": "cleaned_v2 (train + val) + augmented_v2 (196 samples)",
"total_samples": 1792,
"epochs": 7,
"lr": 3e-05,
"loss": "CrossEntropyLoss (effective-number weights, label_smoothing=0.1)",
"pooling": "mean"
},
"data_provenance": {
"original_dataset": "ReDSM5 (CIKM 2025), 1,484 Reddit posts, 2,058 annotations",
"cleaning": "Conflict resolution (53 sentences), dedup (20), confident learning (66 relabeled, 96 removed), manual fixes (9)",
"augmentation": "196 samples via Gemini 2.5 Flash paraphrasing, similarity filtered [0.70, 0.95]",
"dapt": "Domain-adaptive pre-training on 39K Reddit mental health posts (perplexity 16.90\u21927.59)"
}
} |