Spaces:

IFMedTechdemo
/

ECG

Paused

App Files Files Community

IFMedTechdemo commited on Jan 13

Commit

e3b4744

verified ·

1 Parent(s): 9c45578

Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

.gitattributes +5 -0
app.py +161 -0
dataset.py +157 -0
examples/40689238.dat +3 -0
examples/43522917.dat +3 -0
examples/45227415.dat +3 -0
examples/46642833.dat +3 -0
examples/49036311.dat +3 -0
labels_refined.py +105 -0
model.py +93 -0
requirements.txt +6 -0
resnet_advanced.pth +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/40689238.dat filter=lfs diff=lfs merge=lfs -text
+examples/43522917.dat filter=lfs diff=lfs merge=lfs -text
+examples/45227415.dat filter=lfs diff=lfs merge=lfs -text
+examples/46642833.dat filter=lfs diff=lfs merge=lfs -text
+examples/49036311.dat filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import gradio as gr
+import torch
+import torch.nn.functional as F
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import glob
+from labels_refined import get_refined_labels, CLASSES
+from model import ResNet1d
+from dataset import MIMICECGDataset
+# --- Configuration ---
+# HF Space configuration: Data is local
+DATA_DIR = "./examples"
+MODEL_PATH = "resnet_advanced.pth"
+DEVICE = torch.device("cpu") # Spaces usually CPU unless GPU requested
+# --- Load Resources ---
+print("Loading Model...")
+model = ResNet1d(num_classes=5).to(DEVICE)
+try:
+    state_dict = torch.load(MODEL_PATH, map_location=DEVICE, weights_only=True)
+except:
+    state_dict = torch.load(MODEL_PATH, map_location=DEVICE)
+model.load_state_dict(state_dict)
+model.eval()
+# --- Pre-defined Metadata for Examples ---
+# Hardcoded to avoid uploading the sensitive/huge patient CSV
+example_metadata = {
+    "40689238": {
+        "diagnosis": "Sinus Rhythm (Normal)",
+        "text": "Sinus rhythm\nNormal ECG"
+    },
+    "46642833": {
+        "diagnosis": "Atrial Fibrillation",
+        "text": "Atrial fibrillation\nRapid ventricular response"
+    },
+    "49036311": {
+        "diagnosis": "Sinus Tachycardia",
+        "text": "Sinus tachycardia\nPossible Left Atrial Enlargement"
+    },
+    "43522917": {
+        "diagnosis": "Sinus Bradycardia",
+        "text": "Sinus bradycardia\nOtherwise normal"
+    },
+    "45227415": {
+        "diagnosis": "Ventricular Tachycardia (Rare)",
+        "text": "Ventricular tachycardia\nUrgent attention required"
+    }
+}
+def load_signal(path):
+    # Reusing logic from dataset.py
+    if not os.path.exists(path):
+        return None
+    gain = 200.0
+    with open(path, 'rb') as f:
+        # File is raw int16 binary
+        raw_data = np.fromfile(f, dtype=np.int16)
+    n_leads = 12
+    n_samples = 5000
+    expected_size = n_leads * n_samples
+    if raw_data.size < expected_size:
+        padded = np.zeros(expected_size, dtype=np.int16)
+        padded[:raw_data.size] = raw_data
+        raw_data = padded
+    else:
+        raw_data = raw_data[:expected_size]
+    signal = raw_data.reshape((n_samples, n_leads)).T
+    signal = signal.astype(np.float32) / gain
+    return signal
+def plot_ecg(signal, title="12-Lead ECG"):
+    """Generates a matplotlib figure for the 12-lead ECG"""
+    leads = ['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6']
+    fig, axes = plt.subplots(12, 1, figsize=(10, 20), sharex=True)
+    plt.subplots_adjust(hspace=0.2)
+    for i in range(12):
+        axes[i].plot(signal[i], color='k', linewidth=0.8)
+        axes[i].set_ylabel(leads[i], rotation=0, labelpad=20, fontsize=10, fontweight='bold')
+        axes[i].spines['top'].set_visible(False)
+        axes[i].spines['right'].set_visible(False)
+        axes[i].spines['bottom'].set_visible(False if i < 11 else True)
+        axes[i].spines['left'].set_visible(True)
+        axes[i].grid(True, linestyle='--', alpha=0.5)
+    axes[11].set_xlabel("Samples (500Hz)", fontsize=12)
+    fig.suptitle(title, fontsize=16, y=0.90)
+    return fig
+def predict_ecg(study_id):
+    # Path is local in examples/
+    path = os.path.join(DATA_DIR, f"{study_id}.dat")
+    if not os.path.exists(path):
+        return None, f"File not found for study {study_id}", {}
+    # Load Signal
+    signal = load_signal(path)
+    if signal is None:
+        return None, "Error loading signal", {}
+    # Generate Plot
+    fig = plot_ecg(signal, title=f"Study {study_id}")
+    # Inference
+    tensor_sig = torch.from_numpy(signal).float().unsqueeze(0).to(DEVICE) # (1, 12, 5000)
+    with torch.no_grad():
+        logits = model(tensor_sig)
+        probs = torch.sigmoid(logits).cpu().numpy()[0]
+    # Format Results
+    results = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
+    # Get True Text
+    full_text = example_metadata.get(study_id, {}).get("text", "Unknown")
+    return fig, results, full_text
+# --- Gradio UI ---
+examples = [[k, v["diagnosis"]] for k, v in example_metadata.items()]
+example_ids = [k for k in example_metadata.keys()]
+with gr.Blocks(title="ECG Arrhythmia Classifier") as demo:
+    gr.Markdown("# 🫀 AI ECG Arrhythmia Classifier")
+    gr.Markdown("Select a study ID from the examples below to analyze the 12-lead ECG.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Input
+            study_input = gr.Dropdown(choices=example_ids, label="Select Example Study ID", value=example_ids[0])
+            # Info
+            gr.Markdown("### Example Descriptions")
+            gr.DataFrame(headers=["Study ID", "Diagnosis"], value=examples, interactive=False)
+            analyze_btn = gr.Button("Analyze ECG", variant="primary")
+        with gr.Column(scale=2):
+            # Output
+            plot_output = gr.Plot(label="12-Lead ECG Visualization")
+            label_output = gr.Label(label="AI Predictions")
+            text_output = gr.Textbox(label="Original Clinical Report (Ground Truth context)", lines=5)
+    analyze_btn.click(
+        fn=predict_ecg,
+        inputs=[study_input],
+        outputs=[plot_output, label_output, text_output]
+    )
+if __name__ == "__main__":
+    demo.launch()

dataset.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import torch
+from torch.utils.data import Dataset
+import pandas as pd
+import numpy as np
+import os
+class MIMICECGDataset(Dataset):
+    """
+    PyTorch Dataset for MIMIC-IV-ECG.
+    """
+    def __init__(self, df, data_dir, transform=False, label_func=None):
+        """
+        Args:
+            df (pd.DataFrame): Dataframe with subject_id, study_id, and report columns.
+            data_dir (str): Root directory of the dataset (containing the 'files' folder).
+            transform (callable, optional): Optional transform to be applied on a sample.
+            label_func (callable, optional): Custom function to extract labels from a row.
+        """
+        self.df = df
+        self.data_dir = data_dir
+        self.transform = transform
+        self.label_func = label_func
+        # MIMIC-ECG Constants
+        self.n_leads = 12
+        self.n_samples = 5000
+        self.fs = 500
+        self.gain = 200.0 # Standard gain in ADU/mV
+        # Define the target classes we want to detect
+        # These keys will be searched in the report columns
+        self.class_mapping = {
+            'Normally filtered': 0, # Not a diagnosis, but often present
+            'Sinus rhythm': 0,
+            'Atrial fibrillation': 1,
+            'Sinus tachycardia': 2,
+            'Sinus bradycardia': 3,
+            'Ventricular tachycardia': 4,
+            # Add more as needed
+        }
+        self.num_classes = 5 # For now
+    def __len__(self):
+        return len(self.df)
+    def __getitem__(self, idx):
+        if torch.is_tensor(idx):
+            idx = idx.tolist()
+        row = self.df.iloc[idx]
+        subj_id = str(row['subject_id'])
+        study_id = str(row['study_id'])
+        # Construct path: files/p{XXX}/p{subject_id}/s{study_id}/{study_id}.dat
+        subdir = f"p{subj_id[:4]}"
+        # Ensure we handle the folder structure correctly.
+        # Based on exploration: data_dir/files/p100/p10000032/s40689238/40689238.dat
+        file_path = os.path.join(self.data_dir, 'files', subdir, f"p{subj_id}", f"s{study_id}", f"{study_id}.dat")
+        # 1. Load Signal
+        signal = self.load_signal_numpy(file_path)
+        # 2. Get Labels
+        if self.label_func:
+            # Need to pass text to label_func, or row?
+            # get_refined_labels expects text. Let's extract text here or let func handle row.
+            # Best to let func handle text so it's pure.
+            cols = [c for c in self.df.columns if 'report_' in c]
+            full_text = ' '.join([str(row[c]) for c in cols])
+            labels = self.label_func(full_text)
+        else:
+            labels = self.get_labels(row)
+        # 3. Return sample
+        # Signal shape: (12, 5000)
+        sample = {
+            'signal': signal,
+            'labels': labels,
+            'study_id': study_id
+        }
+        return sample
+    def load_signal_numpy(self, path):
+        """
+        Reads the binary .dat file using numpy.
+        Returns a torch tensor of shape (12, 5000).
+        """
+        # Return zeros if file is missing (to avoid crashing training loop on missing files)
+        if not os.path.exists(path):
+            return torch.zeros((self.n_leads, self.n_samples), dtype=torch.float32)
+        try:
+            # Read binary file as 16-bit integers
+            raw_data = np.fromfile(path, dtype=np.int16)
+            # Check size
+            expected_size = self.n_leads * self.n_samples
+            if raw_data.size != expected_size:
+                # Handle truncated or wrong-sized files by padding or cutting
+                if raw_data.size < expected_size:
+                    padded = np.zeros(expected_size, dtype=np.int16)
+                    padded[:raw_data.size] = raw_data
+                    raw_data = padded
+                else:
+                    raw_data = raw_data[:expected_size]
+            # Reshape to (Samples, Leads) then Transpose to (Leads, Samples)
+            # stored as (samples, leads) interleaved? Usually yes in WFDB format 16
+            # Actually, standard WFDB '16' format is often interleaved.
+            # Let's assume interleaved (s1L1, s1L2... s1L12, s2L1...)
+            signal = raw_data.reshape((self.n_samples, self.n_leads)).T
+            # Normalize to mV
+            signal = signal.astype(np.float32) / self.gain
+            return torch.from_numpy(signal)
+        except Exception as e:
+            # print(f"Error loading {path}: {e}")
+            return torch.zeros((self.n_leads, self.n_samples), dtype=torch.float32)
+    def get_labels(self, row):
+        """
+        Extracts labels from report columns.
+        Returns a multi-hot tensor of shape (num_classes).
+        """
+        # Combine all report text
+        cols = [c for c in self.df.columns if 'report_' in c]
+        full_text = ' '.join([str(row[c]) for c in cols]).lower()
+        # Create label vector
+        label_vec = torch.zeros(self.num_classes, dtype=torch.float32)
+        # Simple string matching
+        # 0: Sinus Rhythm (Normal-ish)
+        if 'sinus rhythm' in full_text:
+            label_vec[0] = 1.0
+        # 1: Atrial Fibrillation
+        if 'atrial fibrillation' in full_text:
+            label_vec[1] = 1.0
+        # 2: Tachycardia
+        if 'sinus tachycardia' in full_text:
+            label_vec[2] = 1.0
+        # 3: Bradycardia
+        if 'sinus bradycardia' in full_text:
+            label_vec[3] = 1.0
+        # 4: VTach
+        if 'ventricular tachycardia' in full_text:
+            label_vec[4] = 1.0
+        return label_vec

examples/40689238.dat ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d18f4de5faa9ab7cbe5f54d5ea4d5dddd4b57b80f5879dcc25f2e8f08d5d1c43
+size 120000

examples/43522917.dat ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60bc453a34bbff747774ca629224b4da8ac7dd4f3033ff19ab12345a6ef71cad
+size 120000

examples/45227415.dat ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5d101d965a4fd40eee1507ec288682802579bbd78a14f4c0e4f52f62ef8bbcb
+size 120000

examples/46642833.dat ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:adc5fab3f766fc765e739ea660f1cfbd6f3ed39e1aa6218847e06d4cbe0f233b
+size 120000

examples/49036311.dat ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fea3620d35a355fce80e7e5ee9b63356f87d031e72ec3ead304f209b1b1698eb
+size 120000

labels_refined.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import re
+import torch
+import numpy as np
+# Define classes
+CLASSES = [
+    'Sinus Rhythm',        # 0
+    'Atrial Fibrillation', # 1
+    'Sinus Tachycardia',   # 2
+    'Sinus Bradycardia',   # 3
+    'Ventricular Tachycardia' # 4
+]
+def get_refined_labels(text):
+    """
+    Parses ECG report text to extract diagnostic labels using Regex with negation handling.
+    Args:
+        text (str): Combined text from report columns.
+    Returns:
+        torch.Tensor: Multi-hot encoded label vector.
+    """
+    text = text.lower()
+    labels = torch.zeros(len(CLASSES), dtype=torch.float32)
+    # ---------------------------------------------------------
+    # Helper: Check for positive mention (ignoring negations)
+    # ---------------------------------------------------------
+    def has_condition(patterns, exclusion_patterns=None):
+        if exclusion_patterns:
+            for excl in exclusion_patterns:
+                if re.search(excl, text):
+                    return False
+        for pat in patterns:
+            # Check for negation preceding the match
+            # finding all matches
+            matches = re.finditer(pat, text)
+            for match in matches:
+                start_idx = match.start()
+                # Look at the window before the match (e.g., 20 chars)
+                context_before = text[max(0, start_idx-25):start_idx]
+                # Negation triggers
+                negations = ['no ', 'not ', 'rule out ', 'denies ', 'absence of ', 'free of ']
+                if any(neg in context_before for neg in negations):
+                    continue # This match is negated
+                return True # Found a positive, non-negated match
+        return False
+    # ---------------------------------------------------------
+    # Class 0: Sinus Rhythm
+    # ---------------------------------------------------------
+    # "Sinus rhythm" is often the default, but we should check for it explicitly.
+    if has_condition([r'sinus rhythm']):
+        labels[0] = 1.0
+    # ---------------------------------------------------------
+    # Class 1: Atrial Fibrillation
+    # ---------------------------------------------------------
+    # Synonyms: AFib, A-fib, Atrial Fib
+    if has_condition([r'atrial fibrillation', r'afib', r'a-fib', r'atrial fib']):
+        labels[1] = 1.0
+    # ---------------------------------------------------------
+    # Class 2: Sinus Tachycardia
+    # ---------------------------------------------------------
+    if has_condition([r'sinus tachycardia']):
+        labels[2] = 1.0
+    # ---------------------------------------------------------
+    # Class 3: Sinus Bradycardia
+    # ---------------------------------------------------------
+    if has_condition([r'sinus bradycardia']):
+        labels[3] = 1.0
+    # ---------------------------------------------------------
+    # Class 4: Ventricular Tachycardia
+    # ---------------------------------------------------------
+    # Synonyms: VTach, V-Tach, VT
+    # Be careful with "VT" matching random text
+    if has_condition([r'ventricular tachycardia', r'vtach', r'\bvt\b', r'v-tach']):
+        labels[4] = 1.0
+    return labels
+if __name__ == "__main__":
+    # Test cases
+    test_sentences = [
+        ("Normal sinus rhythm", [1, 0, 0, 0, 0]),
+        ("Atrial fibrillation with rapid ventricular response", [0, 1, 0, 0, 0]),
+        ("No atrial fibrillation detected", [0, 0, 0, 0, 0]),
+        ("Sinus tachycardia", [0, 0, 1, 0, 0]),
+        ("Rule out ventricular tachycardia", [0, 0, 0, 0, 0]),
+        ("Patient has history of afib", [0, 1, 0, 0, 0]), # History might be ambiguous, but usually valid for label
+        ("Sinus bradycardia observed", [0, 0, 0, 1, 0])
+    ]
+    print("Running Regex Label Tests...")
+    for txt, expected in test_sentences:
+        res = get_refined_labels(txt)
+        match = torch.all(res == torch.tensor(expected)).item()
+        print(f"'{txt}' -> {res.numpy()} [{'✅' if match else '❌'}]")

model.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ResNetBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
+        super(ResNetBlock, self).__init__()
+        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=7, stride=stride, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm1d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=7, stride=1, padding=3, bias=False)
+        self.bn2 = nn.BatchNorm1d(out_channels)
+        self.downsample = downsample
+    def forward(self, x):
+        identity = x
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet1d(nn.Module):
+    """
+    ResNet-1D for ECG Classification.
+    Adapted from 'Time Series Classification from Scratch with Deep Neural Networks: A Strong Baseline' (Wang et al. 2017)
+    """
+    def __init__(self, num_classes=5):
+        super(ResNet1d, self).__init__()
+        self.inplanes = 64
+        # Initial: 12 leads -> 64 channels
+        self.conv1 = nn.Conv1d(12, 64, kernel_size=15, stride=2, padding=7, bias=False)
+        self.bn1 = nn.BatchNorm1d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+        # Layers
+        self.layer1 = self._make_layer(64, 2, stride=1)
+        self.layer2 = self._make_layer(128, 2, stride=2)
+        self.layer3 = self._make_layer(256, 2, stride=2)
+        self.layer4 = self._make_layer(512, 2, stride=2)
+        self.avgpool = nn.AdaptiveAvgPool1d(1)
+        self.fc = nn.Linear(512, num_classes)
+    def _make_layer(self, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes:
+            downsample = nn.Sequential(
+                nn.Conv1d(self.inplanes, planes, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm1d(planes),
+            )
+        layers = []
+        layers.append(ResNetBlock(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes
+        for _ in range(1, blocks):
+            layers.append(ResNetBlock(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+if __name__ == "__main__":
+    # Test
+    model = ResNet1d(num_classes=5)
+    dummy = torch.randn(2, 12, 5000)
+    out = model(dummy)
+    print(f"Input: {dummy.shape}")
+    print(f"Output: {out.shape}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch
+pandas
+numpy
+matplotlib
+gradio
+scipy

resnet_advanced.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22f939e89d1fe5528a24f4e4894e507643f39307e52b3b57420fcb6820db9d50
+size 35039598