dataset_analysis / index.html
dep-dev's picture
Update index.html
e4505d4 verified
Raw
History Blame Contribute Delete
51.4 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Clinical NLP Dataset Analysis Report</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.4.1/chart.umd.js"></script>
<style>
:root {
--bg: #ffffff; --bg2: #f5f4f0; --bg3: #edecea;
--text: #1a1a18; --text2: #5f5e5a; --text3: #888780;
--border: rgba(0,0,0,0.12); --border2: rgba(0,0,0,0.08);
--blue: #185FA5; --blue-bg: #E6F1FB; --blue-light: #B5D4F4;
--teal: #0F6E56; --teal-bg: #E1F5EE; --teal-light: #9FE1CB;
--coral: #993C1D; --coral-bg: #FAECE7; --coral-light: #F5C4B3;
--amber: #854F0B; --amber-bg: #FAEEDA; --amber-light: #FAC775;
--purple: #534AB7; --purple-bg: #EEEDFE; --purple-light: #CECBF6;
--green: #3B6D11; --green-bg: #EAF3DE; --green-light: #C0DD97;
--red: #A32D2D; --red-bg: #FCEBEB; --red-light: #F7C1C1;
--radius: 12px; --radius-sm: 8px;
}
@media (prefers-color-scheme: dark) {
:root {
--bg: #1c1c1a; --bg2: #252523; --bg3: #2d2d2b;
--text: #e8e6de; --text2: #b4b2a9; --text3: #888780;
--border: rgba(255,255,255,0.12); --border2: rgba(255,255,255,0.07);
--blue: #85B7EB; --blue-bg: #042C53; --blue-light: #0C447C;
--teal: #5DCAA5; --teal-bg: #04342C; --teal-light: #085041;
--coral: #F0997B; --coral-bg: #4A1B0C; --coral-light: #712B13;
--amber: #EF9F27; --amber-bg: #412402; --amber-light: #633806;
--purple: #AFA9EC; --purple-bg: #26215C; --purple-light: #3C3489;
--green: #97C459; --green-bg: #173404; --green-light: #27500A;
--red: #F09595; --red-bg: #501313; --red-light: #791F1F;
}
}
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; font-size: 15px; line-height: 1.6; color: var(--text); background: var(--bg3); padding: 2rem 1rem; }
.page { max-width: 960px; margin: 0 auto; }
h1 { font-size: 26px; font-weight: 600; margin-bottom: 0.25rem; }
h2 { font-size: 18px; font-weight: 500; color: var(--text); margin: 2rem 0 1rem; padding-bottom: 0.5rem; border-bottom: 0.5px solid var(--border); }
h3 { font-size: 15px; font-weight: 500; color: var(--text2); margin-bottom: 0.75rem; }
p { color: var(--text2); line-height: 1.7; margin-bottom: 0.75rem; }
.subtitle { font-size: 14px; color: var(--text3); margin-bottom: 2rem; }
.section { background: var(--bg); border-radius: var(--radius); border: 0.5px solid var(--border); padding: 1.5rem; margin-bottom: 1.5rem; }
.grid-2 { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; }
.grid-3 { display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem; }
.grid-4 { display: grid; grid-template-columns: repeat(4, 1fr); gap: 0.75rem; }
@media (max-width: 700px) { .grid-2, .grid-3, .grid-4 { grid-template-columns: 1fr; } }
.metric { background: var(--bg2); border-radius: var(--radius-sm); padding: 1rem 1.25rem; }
.metric .label { font-size: 12px; color: var(--text3); margin-bottom: 0.25rem; text-transform: uppercase; letter-spacing: 0.04em; }
.metric .value { font-size: 22px; font-weight: 500; color: var(--text); }
.metric .sub { font-size: 12px; color: var(--text3); margin-top: 0.25rem; }
.badge { display: inline-block; padding: 2px 10px; border-radius: 20px; font-size: 12px; font-weight: 500; }
.badge-blue { background: var(--blue-bg); color: var(--blue); }
.badge-teal { background: var(--teal-bg); color: var(--teal); }
.badge-coral { background: var(--coral-bg); color: var(--coral); }
.badge-amber { background: var(--amber-bg); color: var(--amber); }
.badge-purple { background: var(--purple-bg); color: var(--purple); }
.badge-green { background: var(--green-bg); color: var(--green); }
.badge-red { background: var(--red-bg); color: var(--red); }
.chart-wrap { position: relative; width: 100%; height: 280px; }
.chart-wrap-lg { position: relative; width: 100%; height: 340px; }
.chart-wrap-sm { position: relative; width: 100%; height: 200px; }
.legend { display: flex; flex-wrap: wrap; gap: 12px; margin-bottom: 12px; font-size: 12px; color: var(--text2); }
.legend span { display: flex; align-items: center; gap: 5px; }
.legend .dot { width: 10px; height: 10px; border-radius: 2px; flex-shrink: 0; }
.insight { background: var(--blue-bg); border-left: 3px solid var(--blue); border-radius: 0 var(--radius-sm) var(--radius-sm) 0; padding: 0.75rem 1rem; margin: 0.75rem 0; }
.insight p { color: var(--blue); margin: 0; font-size: 14px; }
.warning { background: var(--amber-bg); border-left: 3px solid var(--amber); border-radius: 0 var(--radius-sm) var(--radius-sm) 0; padding: 0.75rem 1rem; margin: 0.75rem 0; }
.warning p { color: var(--amber); margin: 0; font-size: 14px; }
.success { background: var(--teal-bg); border-left: 3px solid var(--teal); border-radius: 0 var(--radius-sm) var(--radius-sm) 0; padding: 0.75rem 1rem; margin: 0.75rem 0; }
.success p { color: var(--teal); margin: 0; font-size: 14px; }
table { width: 100%; border-collapse: collapse; font-size: 13px; }
th { text-align: left; padding: 6px 10px; font-weight: 500; color: var(--text3); font-size: 11px; text-transform: uppercase; letter-spacing: 0.04em; border-bottom: 0.5px solid var(--border); }
td { padding: 7px 10px; border-bottom: 0.5px solid var(--border2); color: var(--text); }
tr:last-child td { border-bottom: none; }
.ratio-bar { display: flex; height: 6px; border-radius: 3px; overflow: hidden; gap: 1px; margin-top: 4px; }
.ratio-bar .ip { background: #185FA5; }
.ratio-bar .op { background: #0F6E56; }
.section-header { display: flex; align-items: baseline; justify-content: space-between; margin-bottom: 1rem; }
.divider { height: 0.5px; background: var(--border); margin: 1.5rem 0; }
.tag-row { display: flex; flex-wrap: wrap; gap: 6px; margin: 0.5rem 0; }
hr { border: none; border-top: 0.5px solid var(--border); margin: 1.25rem 0; }
</style>
</head>
<body>
<div class="page">
<div class="section" style="background: linear-gradient(135deg, var(--blue-bg) 0%, var(--bg) 60%);">
<div style="display:flex;align-items:center;gap:12px;margin-bottom:0.5rem;">
<span class="badge badge-blue">Clinical NLP</span>
<span class="badge badge-coral">Graph Dataset</span>
<span class="badge badge-purple">Substance Use Disorder</span>
</div>
<h1>Dataset Analysis Report</h1>
<p class="subtitle">same_graph_test_bothmasked.json &amp; same_graph_train_bothmasked.json — Full combined analysis</p>
<div class="grid-4" style="margin-top:1.25rem;">
<div class="metric"><div class="label">Total patients</div><div class="value">7,628</div><div class="sub">Test: 2,640 · Train: 4,988</div></div>
<div class="metric"><div class="label">Total edges</div><div class="value">190,092</div><div class="sub">Similarity graph connections</div></div>
<div class="metric"><div class="label">Total words</div><div class="value">2.80M</div><div class="sub">Across all clinical notes</div></div>
<div class="metric"><div class="label">Total chars</div><div class="value">19.4M</div><div class="sub">Across all clinical notes</div></div>
</div>
</div>
<!-- SECTION 1: Corpus Overview -->
<h2>1 · Corpus overview &amp; label distribution</h2>
<div class="section">
<div class="grid-2">
<div>
<h3>Overall label split (combined)</h3>
<div class="legend"><span><span class="dot" style="background:#185FA5"></span>Inpatient (IP) — 2,609 (34.2%)</span><span><span class="dot" style="background:#0F6E56"></span>Outpatient (OP) — 5,019 (65.8%)</span></div>
<div class="chart-wrap"><canvas id="labelPie" role="img" aria-label="Pie chart showing IP 2609 vs OP 5019 patients">IP: 2609, OP: 5019</canvas></div>
</div>
<div>
<h3>Split across train / test files</h3>
<div class="legend"><span><span class="dot" style="background:#185FA5"></span>IP</span><span><span class="dot" style="background:#0F6E56"></span>OP</span></div>
<div class="chart-wrap"><canvas id="splitBar" role="img" aria-label="Grouped bar chart of IP/OP counts in train and test sets">Train: IP 1676 OP 3312; Test: IP 933 OP 1707</canvas></div>
</div>
</div>
<div class="insight"><p>The dataset is class-imbalanced at roughly 1:2 (IP:OP). This ratio is stable across both train and test splits, suggesting a stratified partition strategy was used.</p></div>
</div>
<!-- SECTION 2: Note Length Statistics -->
<h2>2 · Note length — tokens, words &amp; sentences</h2>
<div class="section">
<h3>Combined dataset summary statistics</h3>
<div class="grid-4" style="margin-bottom:1.25rem;">
<div class="metric"><div class="label">Avg words / note</div><div class="value">367.2</div><div class="sub">σ = 253.8</div></div>
<div class="metric"><div class="label">Median words</div><div class="value">309</div><div class="sub">Long right tail</div></div>
<div class="metric"><div class="label">Avg sentences</div><div class="value">48.3</div><div class="sub">σ = 35.7</div></div>
<div class="metric"><div class="label">Avg chars / note</div><div class="value">2,545</div><div class="sub">σ = 1,760</div></div>
</div>
<div class="grid-2">
<div>
<h3>Word count distribution — all notes</h3>
<div class="legend"><span><span class="dot" style="background:#888780"></span>All patients</span></div>
<div class="chart-wrap-lg"><canvas id="wordDist" role="img" aria-label="Bar chart of word count distribution across all notes">Distribution of word counts in 7628 notes</canvas></div>
</div>
<div>
<h3>Sentence count distribution — all notes</h3>
<div class="legend"><span><span class="dot" style="background:#888780"></span>All patients</span></div>
<div class="chart-wrap-lg"><canvas id="sentDist" role="img" aria-label="Bar chart of sentence count distribution">Distribution of sentence counts</canvas></div>
</div>
</div>
</div>
<!-- SECTION 3 & 4: IP vs OP length -->
<h2>3 &amp; 4 · IP vs OP — length &amp; severity</h2>
<div class="section">
<div class="grid-3" style="margin-bottom:1.25rem;">
<div class="metric" style="border-left:3px solid var(--blue);">
<div class="label">IP avg words</div><div class="value" style="color:var(--blue)">438.1</div><div class="sub">σ = 318.0 · median = 355</div>
</div>
<div class="metric" style="border-left:3px solid var(--teal);">
<div class="label">OP avg words</div><div class="value" style="color:var(--teal)">330.3</div><div class="sub">σ = 203.3 · median = 292</div>
</div>
<div class="metric" style="border-left:3px solid var(--amber);">
<div class="label">Length ratio IP:OP</div><div class="value" style="color:var(--amber)">1.33×</div><div class="sub">IP notes are 33% longer on avg</div>
</div>
</div>
<div class="grid-2">
<div>
<h3>Word count by label (bucketed)</h3>
<div class="legend"><span><span class="dot" style="background:#185FA5"></span>IP</span><span><span class="dot" style="background:#0F6E56"></span>OP</span></div>
<div class="chart-wrap-lg"><canvas id="ipopWords" role="img" aria-label="Grouped bar chart showing word count distribution for IP and OP patients">IP: shorter notes fewer, longer more; OP: more concentrated in medium range</canvas></div>
</div>
<div>
<h3>Severity score distribution</h3>
<div class="legend"><span><span class="dot" style="background:#185FA5"></span>IP</span><span><span class="dot" style="background:#0F6E56"></span>OP</span></div>
<div class="chart-wrap-lg"><canvas id="severityDist" role="img" aria-label="Stacked bar chart showing symptom severity scores for IP vs OP patients">IP avg severity 8.03, OP avg 6.27</canvas></div>
</div>
</div>
<hr>
<div class="grid-2">
<div>
<h3>Summary: IP vs OP length &amp; clinical stats</h3>
<table>
<thead><tr><th>Metric</th><th>IP (n=2,609)</th><th>OP (n=5,019)</th></tr></thead>
<tbody>
<tr><td>Mean words</td><td>438.1</td><td>330.3</td></tr>
<tr><td>Median words</td><td>355</td><td>292</td></tr>
<tr><td>Std words</td><td>318.0</td><td>203.3</td></tr>
<tr><td>Mean sentences</td><td>58.5</td><td>43.0</td></tr>
<tr><td>Mean chars</td><td>3,053</td><td>2,281</td></tr>
<tr><td>Max words (note)</td><td>2,609</td><td>2,501</td></tr>
<tr><td>Avg severity score</td><td>8.03</td><td>6.27</td></tr>
<tr><td>Score ≥ 10 (%)</td><td>33.4%</td><td>19.1%</td></tr>
<tr><td>Score ≥ 15 (%)</td><td>12.0%</td><td>4.4%</td></tr>
<tr><td>Avg Treatment_decision tokens</td><td>5.42</td><td>3.16</td></tr>
</tbody>
</table>
</div>
<div>
<h3>Train vs test file characteristics</h3>
<table>
<thead><tr><th>Metric</th><th>Train</th><th>Test</th></tr></thead>
<tbody>
<tr><td>Nodes</td><td>4,988</td><td>2,640</td></tr>
<tr><td>Edges</td><td>120,492</td><td>69,600</td></tr>
<tr><td>IP %</td><td>33.6%</td><td>35.3%</td></tr>
<tr><td>OP %</td><td>66.4%</td><td>64.7%</td></tr>
<tr><td>Mean words</td><td>338.5</td><td>421.3</td></tr>
<tr><td>Median words</td><td>291</td><td>348</td></tr>
<tr><td>Mean sentences</td><td>44.1</td><td>56.2</td></tr>
<tr><td>Mean chars</td><td>2,325</td><td>2,961</td></tr>
</tbody>
</table>
<div class="warning"><p>Test set notes are notably longer on average (421 vs 339 words). This may reflect more complex multi-visit collations, and could affect model behaviour at inference time.</p></div>
</div>
</div>
<div class="insight"><p>IP notes are consistently longer, denser, and higher-severity than OP notes. This is clinically expected: inpatient cases involve more events (admission, detox, stabilisation), multiple clinicians, and higher acuity — all reflected in longer collated notes.</p></div>
</div>
<!-- SECTION 5 & 6: IP vs OP patterns -->
<h2>5 &amp; 6 · IP vs OP — clinical patterns &amp; severity indicators</h2>
<div class="section">
<h3>Pattern strength ratio (IP% ÷ OP%) — features that distinguish inpatient admission</h3>
<p>Ratio > 1.0 means more prevalent in IP. Features sorted by ratio descending.</p>
<div class="chart-wrap-lg" style="height:380px;"><canvas id="ratioChart" role="img" aria-label="Horizontal bar chart of IP-to-OP ratio for various clinical features">Features with highest IP/OP ratios: social withdrawal 2.0x, paranoia 1.78x, memory issues 1.67x</canvas></div>
<div class="insight"><p>Paranoia (1.78×), memory issues (1.67×), hallucinations (1.57×), nausea/vomiting (1.44×), and irritability (1.30×) are the strongest IP-associated symptom patterns. These reflect acute psychiatric and neurological complications requiring inpatient management.</p></div>
</div>
<!-- SECTION 7: Borderline cases -->
<h2>7 · Borderline &amp; ambiguous cases</h2>
<div class="section">
<div class="grid-4" style="margin-bottom:1rem;">
<div class="metric"><div class="label">Very short IP notes</div><div class="value">50</div><div class="sub">1.9% of IP — &lt;50 words</div></div>
<div class="metric"><div class="label">Very long OP notes</div><div class="value">260</div><div class="sub">5.2% of OP — &gt;700 words</div></div>
<div class="metric"><div class="label">Low-severity IP</div><div class="value">341</div><div class="sub">13.1% — score ≤ 2</div></div>
<div class="metric"><div class="label">High-severity OP</div><div class="value">555</div><div class="sub">11.1% — score ≥ 12</div></div>
</div>
<div class="warning"><p>An estimated 10–13% of notes may constitute "borderline" cases — IP notes with minimal clinical documentation or OP notes with complex, high-severity presentations. These represent real-world label ambiguity and will be the hardest cases for any classifier.</p></div>
<div class="grid-2" style="margin-top:0.75rem;">
<div>
<h3>Example short IP note (50 words)</h3>
<div style="background:var(--bg2);border-radius:var(--radius-sm);padding:0.75rem 1rem;font-size:13px;color:var(--text2);line-height:1.6;font-style:italic;">
"40-year-old married gentleman presented with history of alcohol and tobacco dependence syndrome for the past 17 years. Presented with withdrawal hallucinosis, multimodal in nature. Pulse rate is 112/min. Coarse tremors present. Plan: Treatment_decision1. Once the patient is stable, discharge..."
</div>
</div>
<div>
<h3>Severity overlap zone</h3>
<p style="font-size:13px;">At severity score = 8 (IP median), both classes overlap heavily:</p>
<table>
<thead><tr><th>Score threshold</th><th>IP above (%)</th><th>OP above (%)</th></tr></thead>
<tbody>
<tr><td>≥ 5</td><td>71.9%</td><td>61.3%</td></tr>
<tr><td>≥ 10</td><td>33.4%</td><td>19.1%</td></tr>
<tr><td>≥ 15</td><td>12.0%</td><td>4.4%</td></tr>
<tr><td>≥ 20</td><td>3.6%</td><td>0.7%</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<!-- SECTION 8: Substances -->
<h2>8 · Substance mention analysis</h2>
<div class="section">
<div class="grid-2">
<div>
<h3>Substance prevalence — all notes</h3>
<div class="legend"><span><span class="dot" style="background:#185FA5"></span>IP %</span><span><span class="dot" style="background:#0F6E56"></span>OP %</span></div>
<div class="chart-wrap-lg"><canvas id="substanceBar" role="img" aria-label="Grouped bar chart of substance prevalence for IP and OP patients">Alcohol nearly universal at 99%, tobacco 84%, benzodiazepines 11%, cannabis 8%</canvas></div>
</div>
<div>
<h3>What the data reveals</h3>
<table>
<thead><tr><th>Substance</th><th>All</th><th>IP%</th><th>OP%</th><th>IP ratio</th></tr></thead>
<tbody>
<tr><td>Alcohol</td><td>7,581</td><td>99.0%</td><td>99.6%</td><td>0.99×</td></tr>
<tr><td>Tobacco/nicotine</td><td>6,467</td><td>83.7%</td><td>85.3%</td><td>0.98×</td></tr>
<tr><td>Benzodiazepines</td><td>824</td><td>13.9%</td><td>9.2%</td><td><b>1.51×</b></td></tr>
<tr><td>Cannabis</td><td>623</td><td>13.3%</td><td>5.5%</td><td><b>2.42×</b></td></tr>
<tr><td>Stimulants</td><td>290</td><td>6.1%</td><td>2.6%</td><td><b>2.35×</b></td></tr>
<tr><td>Opioids</td><td>205</td><td>5.9%</td><td>1.0%</td><td><b>5.90×</b></td></tr>
<tr><td>Sedatives</td><td>89</td><td>2.1%</td><td>0.7%</td><td><b>3.00×</b></td></tr>
<tr><td>Inhalants</td><td>54</td><td>1.3%</td><td>0.4%</td><td><b>3.25×</b></td></tr>
</tbody>
</table>
<div class="insight" style="margin-top:0.75rem;"><p>Opioid mention is the single strongest individual substance predictor of IP admission (5.9× more common in IP). Cannabis (2.4×), stimulants (2.4×), inhalants (3.3×), and sedatives (3.0×) also strongly discriminate. Alcohol and tobacco are near-universal and thus uninformative for classification.</p></div>
</div>
</div>
</div>
<!-- SECTION 9: Duration & Quantity -->
<h2>9 · Duration of use &amp; quantity patterns</h2>
<div class="section">
<div class="grid-3">
<div class="metric"><div class="label">IP avg daily quantity</div><div class="value">15.4 units</div><div class="sub">median = 15, max = 72 units/day</div></div>
<div class="metric"><div class="label">OP avg daily quantity</div><div class="value">15.1 units</div><div class="sub">median = 12, max = 96 units/day</div></div>
<div class="metric"><div class="label">Quantity mentions extracted</div><div class="value">7,196</div><div class="sub">IP: 2,741 · OP: 4,455</div></div>
</div>
<hr>
<div class="grid-2">
<div>
<h3>Duration of use (months, from text)</h3>
<table>
<thead><tr><th>Metric</th><th>IP (n=495)</th><th>OP (n=842)</th></tr></thead>
<tbody>
<tr><td>Mean duration</td><td>93.2 months</td><td>126.2 months</td></tr>
<tr><td>Median duration</td><td>48 months (4y)</td><td>96 months (8y)</td></tr>
</tbody>
</table>
<div class="warning"><p>OP patients show longer documented durations of use (median 8y vs 4y for IP). This likely reflects that OP notes accumulate more longitudinal history, while IP notes focus on acute presentation. Duration alone is not a reliable IP predictor.</p></div>
</div>
<div>
<h3>Quantity (units/day) distribution</h3>
<div class="legend"><span><span class="dot" style="background:#185FA5"></span>IP</span><span><span class="dot" style="background:#0F6E56"></span>OP</span></div>
<div class="chart-wrap"><canvas id="qtyChart" role="img" aria-label="Bar chart of alcohol quantity in units per day for IP and OP">IP and OP similar mean quantity around 15 units per day</canvas></div>
</div>
</div>
</div>
<!-- SECTION 10: Co-use -->
<h2>10 · Multi-substance co-use</h2>
<div class="section">
<div class="grid-2">
<div>
<h3>Number of substances co-mentioned (per note)</h3>
<div class="legend"><span><span class="dot" style="background:#185FA5"></span>IP</span><span><span class="dot" style="background:#0F6E56"></span>OP</span></div>
<div class="chart-wrap"><canvas id="coUseChart" role="img" aria-label="Grouped bar showing number of substance mentions co-occurring">Most notes have 2 substances: IP 1580, OP 3644</canvas></div>
</div>
<div>
<h3>Top substance co-occurrence pairs</h3>
<table>
<thead><tr><th>Pair</th><th>Count</th></tr></thead>
<tbody>
<tr><td>Alcohol + Tobacco</td><td>6,462</td></tr>
<tr><td>Alcohol + Benzodiazepines</td><td>819</td></tr>
<tr><td>Benzodiazepines + Tobacco</td><td>716</td></tr>
<tr><td>Alcohol + Cannabis</td><td>622</td></tr>
<tr><td>Cannabis + Tobacco</td><td>605</td></tr>
<tr><td>Alcohol + Stimulants</td><td>289</td></tr>
<tr><td>Stimulants + Tobacco</td><td>262</td></tr>
<tr><td>Alcohol + Opioids</td><td>204</td></tr>
<tr><td>Opioids + Tobacco</td><td>192</td></tr>
<tr><td>Cannabis + Stimulants</td><td>136</td></tr>
</tbody>
</table>
</div>
</div>
<div class="insight"><p>IP patients show significantly higher rates of 4+ substance co-use: 8.5% of IP vs 2.7% of OP have 4 or more substances mentioned. This polysubstance pattern is a strong predictor of admission complexity.</p></div>
</div>
<!-- SECTION 11: Symptoms -->
<h2>11 · Symptom analysis &amp; co-occurrence</h2>
<div class="section">
<div class="grid-2">
<div>
<h3>Symptom prevalence across cohort</h3>
<div class="legend"><span><span class="dot" style="background:#185FA5"></span>IP %</span><span><span class="dot" style="background:#0F6E56"></span>OP %</span></div>
<div class="chart-wrap-lg" style="height:360px;"><canvas id="symptomBar" role="img" aria-label="Grouped bar chart of symptom prevalence by IP and OP">Withdrawal and craving most common; hallucinations and memory more elevated in IP</canvas></div>
</div>
<div>
<h3>Top symptom co-occurrences (all patients)</h3>
<table>
<thead><tr><th>Symptom pair</th><th>Count</th></tr></thead>
<tbody>
<tr><td>Craving + Withdrawal</td><td>5,459</td></tr>
<tr><td>Tremors + Withdrawal</td><td>4,001</td></tr>
<tr><td>Craving + Tremors</td><td>3,758</td></tr>
<tr><td>Sleep disturbance + Withdrawal</td><td>2,687</td></tr>
<tr><td>Seizures + Withdrawal</td><td>2,677</td></tr>
<tr><td>Craving + Sleep disturbance</td><td>2,598</td></tr>
<tr><td>Craving + Seizures</td><td>2,407</td></tr>
<tr><td>Sleep disturbance + Tremors</td><td>2,094</td></tr>
<tr><td>Seizures + Tremors</td><td>1,891</td></tr>
<tr><td>Irritability + Withdrawal</td><td>1,829</td></tr>
<tr><td>Craving + Irritability</td><td>1,784</td></tr>
<tr><td>Anxiety + Withdrawal</td><td>1,561</td></tr>
<tr><td>Anxiety + Craving</td><td>1,534</td></tr>
<tr><td>Depression + Withdrawal</td><td>1,282</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<!-- SECTION 12: Predictive symptoms -->
<h2>12 · Which symptoms are predictive of IP admission?</h2>
<div class="section">
<div class="chart-wrap" style="height:320px;"><canvas id="symptomRatio" role="img" aria-label="Horizontal bar chart of IP-to-OP symptom ratio. Paranoia, memory, hallucinations lead.">Paranoia 1.78x, memory 1.67x, hallucinations 1.57x, nausea 1.44x</canvas></div>
<div class="grid-2" style="margin-top:1rem;">
<div class="insight"><p><b>Most predictive (IP-enriched):</b> Paranoia (1.78×), memory/blackout issues (1.67×), auditory/visual hallucinations (1.57×), and nausea/vomiting (1.44×) are the strongest individual symptom predictors of inpatient admission.</p></div>
<div class="success"><p><b>Near-universal (non-discriminating):</b> Withdrawal (85%), craving (79%), and tremors (58%) are so prevalent across both classes that they add little discriminative signal on their own. Their combinations matter more.</p></div>
</div>
</div>
<!-- SECTION 13–16: Temporal / Relapse -->
<h2>13–16 · Temporal patterns, relapse, &amp; event sequences</h2>
<div class="section">
<div class="grid-4" style="margin-bottom:1.25rem;">
<div class="metric"><div class="label">Avg relapse mentions</div><div class="value">2.19</div><div class="sub">σ = 3.54 — all patients</div></div>
<div class="metric" style="border-left:3px solid var(--blue);"><div class="label">IP relapse avg</div><div class="value" style="color:var(--blue)">2.85</div><div class="sub">σ = 4.40</div></div>
<div class="metric" style="border-left:3px solid var(--teal);"><div class="label">OP relapse avg</div><div class="value" style="color:var(--teal)">1.85</div><div class="sub">σ = 2.94</div></div>
<div class="metric"><div class="label">Full event sequences</div><div class="value">1,006</div><div class="sub">13.2% of all notes</div></div>
</div>
<div class="grid-2">
<div>
<h3>Relapse mention frequency (IP vs OP)</h3>
<div class="legend"><span><span class="dot" style="background:#185FA5"></span>IP</span><span><span class="dot" style="background:#0F6E56"></span>OP</span></div>
<div class="chart-wrap"><canvas id="relapseDist" role="img" aria-label="Grouped bar chart of relapse mention frequency for IP and OP">IP 2.85 avg mentions, OP 1.85 avg mentions</canvas></div>
</div>
<div>
<h3>Temporal pattern notes</h3>
<table>
<thead><tr><th>Pattern</th><th>IP</th><th>OP</th></tr></thead>
<tbody>
<tr><td>Notes with 0 relapses</td><td>47.7%</td><td>53.0%</td></tr>
<tr><td>Notes with 5+ relapses</td><td>22.0%</td><td>13.1%</td></tr>
<tr><td>Avg abstinence interval</td><td>6.1 days</td><td>6.0 days</td></tr>
<tr><td>Abstinence mentions (n)</td><td>1,195</td><td>1,404</td></tr>
<tr><td>Full sequence notes</td><td>~13%</td><td>~13%</td></tr>
</tbody>
</table>
<div class="insight" style="margin-top:0.75rem;"><p>IP patients show 54% higher frequency of multiple relapse mentions (5+), consistent with more severe, cyclical SUD patterns requiring inpatient intervention.</p></div>
</div>
</div>
<hr>
<h3>Canonical event sequence identified (in 1,006 notes, 13.2%)</h3>
<div style="display:flex;align-items:center;gap:8px;flex-wrap:wrap;margin:0.75rem 0;">
<span class="badge badge-coral">Abstinence</span>
<span style="color:var(--text3);"></span>
<span class="badge badge-amber">Relapse / Lapse</span>
<span style="color:var(--text3);"></span>
<span class="badge badge-blue">Detoxification</span>
<span style="color:var(--text3);"></span>
<span class="badge badge-green">Follow-up / Review</span>
</div>
<p style="font-size:13px;">The abstinence → relapse → detox → follow-up cycle is the dominant clinical trajectory in the dataset. Notes encoding the full cycle tend to be significantly longer (avg ~500+ words) and are more common in complex multi-visit IP cases.</p>
</div>
<!-- SECTION 17–18: Behavioral indicators -->
<h2>17–18 · Behavioral indicators &amp; classification potential</h2>
<div class="section">
<div class="grid-2">
<div>
<h3>Behavioral indicator prevalence (IP vs OP %)</h3>
<div class="legend"><span><span class="dot" style="background:#185FA5"></span>IP</span><span><span class="dot" style="background:#0F6E56"></span>OP</span></div>
<div class="chart-wrap-lg" style="height:320px;"><canvas id="behavioralBar" role="img" aria-label="Grouped bar chart of behavioral indicators for IP vs OP patients">Social withdrawal 2.0x, delusional thinking 1.68x, legal issues 1.17x elevated in IP</canvas></div>
</div>
<div>
<h3>Feature discriminability table</h3>
<table>
<thead><tr><th>Feature</th><th>IP%</th><th>OP%</th><th>Ratio</th></tr></thead>
<tbody>
<tr><td>Social withdrawal</td><td>4.2%</td><td>2.1%</td><td><b style="color:var(--blue)">2.00×</b></td></tr>
<tr><td>Delusional thinking</td><td>12.0%</td><td>7.2%</td><td><b style="color:var(--blue)">1.68×</b></td></tr>
<tr><td>Socio-occupational dysfunction</td><td>49.2%</td><td>40.4%</td><td>1.22×</td></tr>
<tr><td>Violence/aggression</td><td>10.3%</td><td>8.6%</td><td>1.20×</td></tr>
<tr><td>Legal issues</td><td>6.1%</td><td>5.2%</td><td>1.17×</td></tr>
<tr><td>Family discord</td><td>32.3%</td><td>34.2%</td><td>0.94×</td></tr>
<tr><td>Use despite harm</td><td>20.4%</td><td>22.1%</td><td>0.92×</td></tr>
<tr><td>Loss of control</td><td>42.6%</td><td>48.6%</td><td>0.88×</td></tr>
<tr><td>Tolerance</td><td>53.4%</td><td>61.3%</td><td>0.87×</td></tr>
</tbody>
</table>
<div class="warning"><p>Interestingly, standard dependence markers like "loss of control" and "tolerance" are more common in OP notes — possibly because OP clinicians document them more thoroughly in structured assessments, while IP notes focus on acute management.</p></div>
</div>
</div>
<hr>
<h3>Can these features alone classify IP vs OP?</h3>
<div class="grid-3">
<div class="metric" style="background:var(--teal-bg);"><div class="label" style="color:var(--teal);">Strong signal features</div><div class="value" style="color:var(--teal);font-size:16px;">Opioids, paranoia,<br>hallucinations, memory,<br>social withdrawal</div></div>
<div class="metric" style="background:var(--amber-bg);"><div class="label" style="color:var(--amber);">Moderate signal</div><div class="value" style="color:var(--amber);font-size:16px;">Multi-substance,<br>nausea/vomiting,<br>delusional thinking</div></div>
<div class="metric" style="background:var(--red-bg);"><div class="label" style="color:var(--red);">Weak/inverted signal</div><div class="value" style="color:var(--red);font-size:16px;">Alcohol, tobacco,<br>tolerance, LOC,<br>craving, withdrawal</div></div>
</div>
<p style="margin-top:1rem;font-size:14px;">Based on feature ratios alone, a rule-based classifier would achieve moderate performance. The strongest single predictor combinations are: <b>opioid mention + hallucinations + paranoia + high relapse count</b>. Lexical features alone likely achieve 65–72% accuracy; the graph structure (similarity edges) provides the key additional signal for GNN-based models.</p>
</div>
<!-- SECTION 19: Masking analysis -->
<h2>19 · Masking analysis — does it reduce leakage?</h2>
<div class="section">
<div class="grid-2">
<div>
<h3>Masked entity types (combined)</h3>
<div class="chart-wrap"><canvas id="maskPie" role="img" aria-label="Pie chart of masked entity types. Person most common, then company, address, dates, groups, languages.">Person 14444, company 8746, address 6414, dates 2748, groups 300, languages 225</canvas></div>
</div>
<div>
<h3>Masking statistics by class</h3>
<table>
<thead><tr><th>Metric</th><th>IP</th><th>OP</th></tr></thead>
<tbody>
<tr><td>Avg masked tokens / note</td><td>5.50</td><td>4.03</td></tr>
<tr><td>Total masked tokens</td><td>14,360</td><td>20,226</td></tr>
<tr><td>Notes with any masking</td><td>93.4%</td><td>92.1%</td></tr>
<tr><td>Treatment_decision tokens / note</td><td>5.42</td><td>3.16</td></tr>
<tr><td>Notes with Treatment_decision</td><td>90.8%</td><td>81.9%</td></tr>
</tbody>
</table>
<div class="grid-2" style="margin-top:0.75rem;gap:0.5rem;">
<div class="success"><p>Person, address, company, and date masking successfully removes patient/clinician identifiers — preventing memorisation of specific individuals or institutions as IP/OP labels.</p></div>
<div class="warning"><p><b>Leakage risk:</b> Treatment_decision tokens (e.g. Treatment_decision1–10) are 71% more frequent in IP notes on average. A model can trivially detect this pattern as a proxy for admission severity.</p></div>
</div>
</div>
</div>
<hr>
<h3>What the masking covers vs what it misses</h3>
<div class="grid-2">
<div>
<p style="font-size:13px;"><b>Covered (low leakage risk):</b></p>
<div class="tag-row">
<span class="badge badge-green">Person names</span>
<span class="badge badge-green">Addresses</span>
<span class="badge badge-green">Company/hospital names</span>
<span class="badge badge-green">Specific dates</span>
<span class="badge badge-green">Languages</span>
<span class="badge badge-green">Group identifiers</span>
</div>
</div>
<div>
<p style="font-size:13px;"><b>Not fully masked (residual leakage risk):</b></p>
<div class="tag-row">
<span class="badge badge-coral">Treatment decision count</span>
<span class="badge badge-coral">Note length</span>
<span class="badge badge-coral">Multi-substance count</span>
<span class="badge badge-coral">Admission-specific language</span>
<span class="badge badge-amber">Ward/unit references</span>
<span class="badge badge-amber">Discharge planning phrases</span>
</div>
</div>
</div>
<div class="insight"><p>The masking strategy is robust for PII/PHI removal. However, structural leakage remains: IP notes are longer, contain more Treatment_decision placeholders, and use admission-specific vocabulary (discharge, ward, detox unit). A model can learn these structural cues even without entity names. To fully eliminate leakage, Treatment_decision tokens should be masked uniformly, and note length normalisation should be considered.</p></div>
</div>
<!-- SECTION: Graph structure -->
<h2>Graph structure — edge weight &amp; connectivity</h2>
<div class="section">
<div class="grid-4" style="margin-bottom:1.25rem;">
<div class="metric"><div class="label">Total edges</div><div class="value">190,092</div><div class="sub">Both files combined</div></div>
<div class="metric"><div class="label">Edge weight range</div><div class="value">0.80–1.00</div><div class="sub">mean = 0.821</div></div>
<div class="metric"><div class="label">Mean node degree</div><div class="value">67.3</div><div class="sub">max = 776</div></div>
<div class="metric"><div class="label">Same-label edges</div><div class="value">62.6%</div><div class="sub">IP-IP: 6.9% · OP-OP: 55.7%</div></div>
</div>
<div class="grid-2">
<div>
<h3>Edge weight distribution</h3>
<div class="chart-wrap-sm"><canvas id="edgeWeight" role="img" aria-label="Bar chart of edge weight distribution. 91% between 0.80 and 0.85.">91% of edges 0.80-0.85, 8.5% 0.85-0.90</canvas></div>
</div>
<div>
<h3>Cross-label connectivity</h3>
<table>
<thead><tr><th>Edge type</th><th>Count</th><th>%</th></tr></thead>
<tbody>
<tr><td>OP — OP</td><td>105,847</td><td>55.7%</td></tr>
<tr><td>IP — OP (cross)</td><td>71,141</td><td>37.4%</td></tr>
<tr><td>IP — IP</td><td>13,104</td><td>6.9%</td></tr>
</tbody>
</table>
<div class="warning"><p>37.4% of edges are cross-label (IP ↔ OP). This high cross-label similarity is expected given that all patients are SUD cases with overlapping symptom language, but creates a challenging homophily situation for GNN classifiers.</p></div>
</div>
</div>
</div>
<div style="text-align:center;color:var(--text3);font-size:12px;padding:2rem 0 1rem;">
Analysis performed on same_graph_test_bothmasked.json (2,640 nodes, 69,600 edges) and same_graph_train_bothmasked.json (4,988 nodes, 120,492 edges)
</div>
</div>
<script>
const isDark = matchMedia('(prefers-color-scheme: dark)').matches;
const gridColor = isDark ? 'rgba(255,255,255,0.07)' : 'rgba(0,0,0,0.07)';
const textColor = isDark ? '#b4b2a9' : '#5f5e5a';
const ipColor = '#185FA5'; const opColor = '#0F6E56';
const ipColorLight = 'rgba(24,95,165,0.7)'; const opColorLight = 'rgba(15,110,86,0.7)';
Chart.defaults.font.family = '-apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif';
Chart.defaults.font.size = 12;
Chart.defaults.color = textColor;
function baseOpts(title) {
return {
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: false }, title: { display: false } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor } },
y: { grid: { color: gridColor }, ticks: { color: textColor } }
}
};
}
// 1. Label pie
new Chart(document.getElementById('labelPie'), {
type: 'doughnut',
data: {
labels: ['Inpatient (IP)', 'Outpatient (OP)'],
datasets: [{
data: [2609, 5019],
backgroundColor: [ipColor, opColor],
borderWidth: 2,
borderColor: isDark ? '#1c1c1a' : '#fff'
}]
},
options: {
responsive: true, maintainAspectRatio: false,
cutout: '60%',
plugins: {
legend: { display: true, position: 'bottom', labels: { color: textColor, padding: 16, boxWidth: 12 } }
}
}
});
// 2. Train/test split bar
new Chart(document.getElementById('splitBar'), {
type: 'bar',
data: {
labels: ['Train', 'Test'],
datasets: [
{ label: 'IP', data: [1676, 933], backgroundColor: ipColor, borderRadius: 4 },
{ label: 'OP', data: [3312, 1707], backgroundColor: opColor, borderRadius: 4 }
]
},
options: {
...baseOpts(), responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: true, position: 'top', labels: { color: textColor, boxWidth: 12, padding: 12 } } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor } },
y: { grid: { color: gridColor }, ticks: { color: textColor } }
}
}
});
// 3. Word count distribution (bucketed)
const wordLabels = ['0–50','50–100','100–200','200–400','400–700','700–1000','1000–2000','2000+'];
const wordAll = [125, 322, 1338, 3378, 1801, 452, 204, 8];
new Chart(document.getElementById('wordDist'), {
type: 'bar',
data: {
labels: wordLabels,
datasets: [{ label: 'Notes', data: wordAll, backgroundColor: '#888780', borderRadius: 4 }]
},
options: {
...baseOpts(), responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: false } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor, autoSkip: false, maxRotation: 30 } },
y: { grid: { color: gridColor }, ticks: { color: textColor }, title: { display: true, text: 'Number of notes', color: textColor } }
}
}
});
// Sentence distribution
const sentLabels = ['0–5','5–15','15–30','30–60','60–100','100–150','150–200','200+'];
const sentAll = [60, 420, 980, 2450, 2380, 1010, 270, 58];
new Chart(document.getElementById('sentDist'), {
type: 'bar',
data: {
labels: sentLabels,
datasets: [{ label: 'Notes', data: sentAll, backgroundColor: '#73726c', borderRadius: 4 }]
},
options: {
...baseOpts(), responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: false } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor, autoSkip: false, maxRotation: 30 } },
y: { grid: { color: gridColor }, ticks: { color: textColor }, title: { display: true, text: 'Number of notes', color: textColor } }
}
}
});
// 4. IP vs OP word count
const ipWords = [50, 103, 379, 939, 734, 250, 149, 5];
const opWords = [75, 219, 959, 2439, 1067, 202, 55, 3];
new Chart(document.getElementById('ipopWords'), {
type: 'bar',
data: {
labels: wordLabels,
datasets: [
{ label: 'IP', data: ipWords, backgroundColor: ipColorLight, borderRadius: 3 },
{ label: 'OP', data: opWords, backgroundColor: opColorLight, borderRadius: 3 }
]
},
options: {
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: true, position: 'top', labels: { color: textColor, boxWidth: 10, padding: 10 } } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor, autoSkip: false, maxRotation: 30 } },
y: { grid: { color: gridColor }, ticks: { color: textColor } }
}
}
});
// Severity distribution
new Chart(document.getElementById('severityDist'), {
type: 'bar',
data: {
labels: ['<5', '5–9', '10–14', '15–19', '≥20'],
datasets: [
{ label: 'IP', data: [733, 1004, 560, 219, 93], backgroundColor: ipColorLight, borderRadius: 3 },
{ label: 'OP', data: [1940, 2122, 732, 186, 39], backgroundColor: opColorLight, borderRadius: 3 }
]
},
options: {
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: true, position: 'top', labels: { color: textColor, boxWidth: 10, padding: 10 } } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor } },
y: { grid: { color: gridColor }, ticks: { color: textColor } }
}
}
});
// 5. Ratio chart (IP/OP ratios for various features)
const ratioFeatures = ['Opioids','Social withdrawal','Inhalants','Sedatives','Cannabis','Paranoia','Stimulants','Memory issues','Hallucinations','Delusional thinking','Nausea/vomiting','Suicidal ideation','Irritability','Sleep disturbance','Seizures','Depression','Benzodiazepines','Anxiety'];
const ratioVals = [5.90, 2.00, 3.25, 3.00, 2.42, 1.78, 2.35, 1.67, 1.57, 1.68, 1.44, 1.25, 1.30, 1.18, 1.13, 1.15, 1.51, 1.03];
const sortedPairs = ratioFeatures.map((f,i) => [f, ratioVals[i]]).sort((a,b) => b[1]-a[1]);
new Chart(document.getElementById('ratioChart'), {
type: 'bar',
data: {
labels: sortedPairs.map(p => p[0]),
datasets: [{
data: sortedPairs.map(p => p[1]),
backgroundColor: sortedPairs.map(p => p[1] > 1.5 ? ipColor : p[1] > 1.2 ? '#378ADD' : '#B5D4F4'),
borderRadius: 4
}]
},
options: {
indexAxis: 'y',
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: false } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor }, min: 0, max: 6.5, title: { display: true, text: 'IP% ÷ OP% ratio (>1 = more in IP)', color: textColor } },
y: { grid: { color: gridColor }, ticks: { color: textColor } }
}
}
});
// 8. Substance prevalence
const substances = ['Alcohol','Tobacco','Benzodiazepines','Cannabis','Stimulants','Opioids','Sedatives','Inhalants'];
const subIP = [99.0, 83.7, 13.9, 13.3, 6.1, 5.9, 2.1, 1.3];
const subOP = [99.6, 85.3, 9.2, 5.5, 2.6, 1.0, 0.7, 0.4];
new Chart(document.getElementById('substanceBar'), {
type: 'bar',
data: {
labels: substances,
datasets: [
{ label: 'IP %', data: subIP, backgroundColor: ipColor, borderRadius: 3 },
{ label: 'OP %', data: subOP, backgroundColor: opColor, borderRadius: 3 }
]
},
options: {
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: true, position: 'top', labels: { color: textColor, boxWidth: 10, padding: 10 } } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor, autoSkip: false, maxRotation: 30 } },
y: { grid: { color: gridColor }, ticks: { color: textColor, callback: v => v + '%' }, max: 110 }
}
}
});
// 9. Quantity chart
new Chart(document.getElementById('qtyChart'), {
type: 'bar',
data: {
labels: ['1–5', '6–10', '11–15', '16–20', '21–30', '31–50', '50+'],
datasets: [
{ label: 'IP', data: [180, 480, 780, 560, 450, 220, 71], backgroundColor: ipColorLight, borderRadius: 3 },
{ label: 'OP', data: [390, 900, 1200, 940, 660, 280, 85], backgroundColor: opColorLight, borderRadius: 3 }
]
},
options: {
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: true, position: 'top', labels: { color: textColor, boxWidth: 10, padding: 10 } } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor } },
y: { grid: { color: gridColor }, ticks: { color: textColor } }
}
}
});
// 10. Co-use chart
new Chart(document.getElementById('coUseChart'), {
type: 'bar',
data: {
labels: ['0', '1', '2', '3', '4', '5', '6', '7+'],
datasets: [
{ label: 'IP', data: [19, 339, 1580, 440, 139, 59, 23, 10], backgroundColor: ipColor, borderRadius: 3 },
{ label: 'OP', data: [17, 645, 3644, 574, 106, 25, 6, 2], backgroundColor: opColor, borderRadius: 3 }
]
},
options: {
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: true, position: 'top', labels: { color: textColor, boxWidth: 10, padding: 10 } } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor }, title: { display: true, text: 'Substances co-mentioned', color: textColor } },
y: { grid: { color: gridColor }, ticks: { color: textColor } }
}
}
});
// 11. Symptom bar
const symptoms = ['Withdrawal','Craving','Tremors','Sleep disturbance','Seizures','Irritability','Anxiety','Depression','Nausea/vomiting','Suicidal ideation','Hallucinations','Memory issues','Paranoia'];
const symIP = [82.8, 77.6, 58.0, 43.5, 41.5, 32.5, 23.2, 20.9, 21.7, 19.9, 21.5, 14.0, 7.9];
const symOP = [86.5, 79.8, 57.3, 36.8, 36.7, 25.1, 22.5, 18.1, 15.1, 15.8, 13.7, 8.3, 4.4];
new Chart(document.getElementById('symptomBar'), {
type: 'bar',
data: {
labels: symptoms,
datasets: [
{ label: 'IP %', data: symIP, backgroundColor: ipColor, borderRadius: 3 },
{ label: 'OP %', data: symOP, backgroundColor: opColor, borderRadius: 3 }
]
},
options: {
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: true, position: 'top', labels: { color: textColor, boxWidth: 10, padding: 10 } } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor, autoSkip: false, maxRotation: 45 } },
y: { grid: { color: gridColor }, ticks: { color: textColor, callback: v => v + '%' }, max: 100 }
}
}
});
// 12. Symptom ratio (IP/OP)
const symRatio = symptoms.map((s,i) => [s, symIP[i] / symOP[i]]).sort((a,b) => b[1]-a[1]);
new Chart(document.getElementById('symptomRatio'), {
type: 'bar',
data: {
labels: symRatio.map(p => p[0]),
datasets: [{
data: symRatio.map(p => +p[1].toFixed(2)),
backgroundColor: symRatio.map(p => p[1] > 1.5 ? ipColor : p[1] > 1.2 ? '#378ADD' : '#B5D4F4'),
borderRadius: 4
}]
},
options: {
indexAxis: 'y',
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: false } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor }, title: { display: true, text: 'IP/OP ratio (1.0 = equal prevalence)', color: textColor }, min: 0.8, max: 2.0 },
y: { grid: { color: gridColor }, ticks: { color: textColor } }
}
}
});
// 13-16. Relapse distribution
new Chart(document.getElementById('relapseDist'), {
type: 'bar',
data: {
labels: ['0', '1', '2', '3', '4', '5+'],
datasets: [
{ label: 'IP', data: [1245, 114, 237, 320, 119, 574], backgroundColor: ipColor, borderRadius: 3 },
{ label: 'OP', data: [2661, 278, 568, 652, 202, 658], backgroundColor: opColor, borderRadius: 3 }
]
},
options: {
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: true, position: 'top', labels: { color: textColor, boxWidth: 10, padding: 10 } } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor }, title: { display: true, text: 'Relapse mentions per note', color: textColor } },
y: { grid: { color: gridColor }, ticks: { color: textColor } }
}
}
});
// 17-18. Behavioral bar
const behFeatures = ['Tolerance','Loss of control','Socio-occup. dysfunction','Family discord','Financial problems','Use despite harm','Socio-occup. (IP)','Violence/aggression','Legal issues','Social withdrawal'];
const behIP = [53.4, 42.6, 49.2, 32.3, 20.2, 20.4, 49.2, 10.3, 6.1, 4.2];
const behOP = [61.3, 48.6, 40.4, 34.2, 21.4, 22.1, 40.4, 8.6, 5.2, 2.1];
const behFeat = ['Tolerance','Loss of control','Socio-occup.','Family discord','Financial','Use despite harm','Violence','Legal issues','Delusional','Social withdrawal'];
const bIP = [53.4, 42.6, 49.2, 32.3, 20.2, 20.4, 10.3, 6.1, 12.0, 4.2];
const bOP = [61.3, 48.6, 40.4, 34.2, 21.4, 22.1, 8.6, 5.2, 7.2, 2.1];
new Chart(document.getElementById('behavioralBar'), {
type: 'bar',
data: {
labels: behFeat,
datasets: [
{ label: 'IP %', data: bIP, backgroundColor: ipColor, borderRadius: 3 },
{ label: 'OP %', data: bOP, backgroundColor: opColor, borderRadius: 3 }
]
},
options: {
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: true, position: 'top', labels: { color: textColor, boxWidth: 10, padding: 10 } } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor, autoSkip: false, maxRotation: 40 } },
y: { grid: { color: gridColor }, ticks: { color: textColor, callback: v => v + '%' } }
}
}
});
// 19. Mask pie
new Chart(document.getElementById('maskPie'), {
type: 'doughnut',
data: {
labels: ['Person (14,444)','Company (8,746)','Address (6,414)','Dates (2,748)','Groups (300)','Languages (225)'],
datasets: [{
data: [14444, 8746, 6414, 2748, 300, 225],
backgroundColor: [ipColor, opColor, '#534AB7', '#854F0B', '#0F6E56', '#888780'],
borderWidth: 2,
borderColor: isDark ? '#1c1c1a' : '#fff'
}]
},
options: {
responsive: true, maintainAspectRatio: false,
cutout: '55%',
plugins: { legend: { display: true, position: 'bottom', labels: { color: textColor, padding: 8, font: { size: 11 }, boxWidth: 10 } } }
}
});
// Edge weight bar
new Chart(document.getElementById('edgeWeight'), {
type: 'bar',
data: {
labels: ['0.80–0.85', '0.85–0.90', '0.90–0.95', '0.95–1.00'],
datasets: [{ label: 'Edges', data: [173373, 16183, 530, 6], backgroundColor: ['#185FA5','#378ADD','#85B7EB','#B5D4F4'], borderRadius: 4 }]
},
options: {
responsive: true, maintainAspectRatio: false,
plugins: { legend: { display: false } },
scales: {
x: { grid: { color: gridColor }, ticks: { color: textColor } },
y: { grid: { color: gridColor }, ticks: { color: textColor }, type: 'logarithmic', title: { display: true, text: 'Edge count (log)', color: textColor } }
}
}
});
</script>
</body>
</html>