Spaces:

Tumo505
/

SSL-ECG-Classification

Sleeping

App Files Files Community

Tumo505 commited on Apr 19

Commit

d4303a2

1 Parent(s): a2229cb

fix wfdb error

Browse files

Files changed (2) hide show

app.py +268 -84
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ import numpy as np
 import plotly.graph_objects as go
 from huggingface_hub import hf_hub_download
 import tempfile
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -52,6 +53,239 @@ class ECGClassifier(nn.Module):
         logits = self.classifier(embeddings)
         return logits
 # Load model
 model = None
 try:
@@ -69,9 +303,9 @@ try:
     model.load_state_dict(state_dict, strict=False)
     model.to(device)
     model.eval()
-    print("✅ Model loaded successfully")
 except Exception as e:
-    print(f"❌ Error loading model: {e}")
     import traceback
     traceback.print_exc()
@@ -92,71 +326,12 @@ def predict_ecg(file_obj):
         else:
             file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
-        # Load ECG data - handle multiple formats
         print(f"Loading file: {file_path}")
-        # Detect format by extension
-        if file_path.endswith('.hea'):
-            # WFDB (PhysioNet) format
-            print("Detected WFDB (.hea) format...")
-            try:
-                import wfdb
-                # Load the record (without extension)
-                record_path = file_path.replace('.hea', '')
-                record = wfdb.rdrecord(record_path)
-                ecg = record.p_signal  # Get signal data
-                print(f"WFDB loaded: {ecg.shape}")
-            except ImportError:
-                return (
-                    "**Error**: WFDB library not installed\n"
-                    "Python-wfdb package required for .hea files",
-                    None
-                )
-            except Exception as e:
-                return (f"**WFDB Error**: {str(e)}", None)
-        elif file_path.endswith('.bat'):
-            # BAT format (binary or text batch format)
-            print("Detected BAT format...")
-            try:
-                # Try reading as binary NumPy array first
-                ecg = np.fromfile(file_path, dtype=np.float32)
-                # Reshape if needed - assume 128 samples per lead or similar
-                if len(ecg) % 12 == 0:
-                    ecg = ecg.reshape(12, -1)
-                else:
-                    # Single lead, will be replicated later
-                    ecg = ecg.reshape(1, -1)
-                print(f"BAT loaded: {ecg.shape}")
-            except:
-                try:
-                    # Try text format
-                    ecg = np.loadtxt(file_path)
-                    if ecg.ndim == 1:
-                        ecg = ecg.reshape(1, -1)
-                except Exception as e:
-                    return (f"**BAT Error**: Could not parse file - {str(e)}", None)
-        else:
-            # Try standard text formats (CSV, space-separated, tab-separated, .npy)
-            try:
-                if file_path.endswith('.npy'):
-                    ecg = np.load(file_path)
-                else:
-                    # Try space-separated (UCR/ArrowHead format)
-                    ecg = np.genfromtxt(file_path, delimiter=None)
-            except:
-                try:
-                    # Try comma-separated
-                    ecg = np.loadtxt(file_path, delimiter=',')
-                except:
-                    try:
-                        # Try tab-separated
-                        ecg = np.loadtxt(file_path, delimiter='\t')
-                    except Exception as e:
-                        return (f"**File Error**: Could not parse file format - {str(e)}", None)
-        print(f"Loaded shape: {ecg.shape}")
         # Handle 1D array (single sample)
         if ecg.ndim == 1:
@@ -319,30 +494,39 @@ with gr.Blocks(
             gr.Markdown("""
             ### Upload Your ECG
-            **Supported Formats:**
-            - CSV / TSV / TXT (space, comma, or tab-separated)
-            - NumPy .npy file
-            - WFDB .hea (PhysioNet format)
-            - BAT (binary or batch ECG files)
-            - UCR/ArrowHead format (UCR5000, etc.)
-            **Requirements:**
-            - Multi-lead: (N leads, M samples)
-            - Single-lead: Will be replicated to 12 leads
-            - Sampling Rate: Any (will be normalized)
-            - Will auto-pad/trim to 5000 samples
-            **Example Structure (CSV/TXT):**
-            ```
-            lead_I, lead_II, ..., lead_aVF
-            0.123, 0.456, ..., 0.789
-            ...
-            ```
             """)
             file_input = gr.File(
                 label="ECG File",
-                file_types=[".csv", ".txt", ".tsv", ".npy", ".hea", ".bat"],
                 type="filepath"
             )

 import plotly.graph_objects as go
 from huggingface_hub import hf_hub_download
 import tempfile
+from pathlib import Path
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logits = self.classifier(embeddings)
         return logits
+def load_ecg_file(file_path):
+    """
+    Comprehensive ECG file loader supporting multiple formats
+    Supported formats:
+    - Text: CSV, TXT, TSV (any delimiter)
+    - NumPy: .npy
+    - PhysioNet: .hea/.dat (WFDB)
+    - MATLAB: .mat
+    - HDF5: .h5, .hdf5
+    - EDF: .edf (European Data Format)
+    - DICOM: .dcm
+    - XML: .xml (HL7 aECG)
+    - Binary: .raw, .bin, .bat
+    """
+    file_path = str(file_path)
+    extension = Path(file_path).suffix.lower()
+    print(f"Loading {extension} format from: {file_path}")
+    try:
+        # WFDB Format (.hea/.dat)
+        if extension == '.hea':
+            try:
+                import wfdb
+                record_path = file_path.replace('.hea', '')
+                # Check if .dat file exists
+                dat_path = record_path + '.dat'
+                if not Path(dat_path).exists():
+                    raise Exception(
+                        "WFDB format requires TWO files:\n"
+                        f"  1. {Path(file_path).name} (header)\n"
+                        f"  2. {Path(dat_path).name} (data)\n\n"
+                        "Please upload both files and try again, or upload just the .hea or .dat file in a ZIP archive."
+                    )
+                record = wfdb.rdrecord(record_path)
+                ecg = record.p_signal
+                print(f"WFDB (.hea/.dat) loaded: {ecg.shape}")
+                return ecg
+            except Exception as e:
+                if "WFDB format requires" in str(e):
+                    raise e
+                raise Exception(f"WFDB error: {str(e)}")
+        # Handle .dat files (paired with .hea)
+        elif extension == '.dat':
+            try:
+                import wfdb
+                record_path = file_path.replace('.dat', '')
+                hea_path = record_path + '.hea'
+                if not Path(hea_path).exists():
+                    raise Exception(
+                        "WFDB format requires TWO files:\n"
+                        f"  1. {Path(hea_path).name} (header)\n"
+                        f"  2. {Path(file_path).name} (data)\n\n"
+                        "Please upload both files and try again, or upload both files in a ZIP archive."
+                    )
+                record = wfdb.rdrecord(record_path)
+                ecg = record.p_signal
+                print(f"WFDB (.hea/.dat) loaded: {ecg.shape}")
+                return ecg
+            except Exception as e:
+                if "WFDB format requires" in str(e):
+                    raise e
+                raise Exception(f"WFDB error: {str(e)}")
+        # MATLAB Format (.mat)
+        elif extension == '.mat':
+            try:
+                from scipy import io
+                mat_data = io.loadmat(file_path)
+                # Try common variable names
+                for key in ['ecg', 'ECG', 'signal', 'data', 'val']:
+                    if key in mat_data:
+                        ecg = np.array(mat_data[key])
+                        print(f"MATLAB loaded ({key}): {ecg.shape}")
+                        return ecg
+                # If no standard key, use largest array
+                arrays = {k: v for k, v in mat_data.items() if isinstance(v, np.ndarray) and v.ndim <= 2}
+                if arrays:
+                    key = max(arrays.keys(), key=lambda k: arrays[k].size)
+                    ecg = arrays[key]
+                    print(f"MATLAB loaded ({key}): {ecg.shape}")
+                    return ecg
+                raise Exception("No ECG data found in .mat file")
+            except ImportError:
+                raise Exception("SciPy required: pip install scipy")
+        # HDF5 Format (.h5, .hdf5)
+        elif extension in ['.h5', '.hdf5']:
+            try:
+                import h5py
+                with h5py.File(file_path, 'r') as f:
+                    # Try common keys
+                    for key in ['ecg', 'ECG', 'signal', 'data', 'waveform']:
+                        if key in f:
+                            ecg = np.array(f[key])
+                            print(f"HDF5 loaded ({key}): {ecg.shape}")
+                            return ecg
+                    # Use first dataset if no standard key
+                    keys = list(f.keys())
+                    if keys:
+                        key = keys[0]
+                        ecg = np.array(f[key])
+                        print(f"HDF5 loaded ({key}): {ecg.shape}")
+                        return ecg
+                raise Exception("No ECG data found in HDF5 file")
+            except ImportError:
+                raise Exception("h5py required: pip install h5py")
+        # EDF Format (.edf)
+        elif extension == '.edf':
+            try:
+                import pyedflib
+                f = pyedflib.EdfReader(file_path)
+                n = f.signals_in_file
+                ecg = np.zeros((n, f.getNSamples()[0]))
+                for i in range(n):
+                    ecg[i, :] = f.readSignal(i)
+                f.close()
+                print(f"EDF loaded: {ecg.shape}")
+                return ecg
+            except ImportError:
+                raise Exception("pyedflib required: pip install pyedflib")
+        # DICOM Format (.dcm)
+        elif extension == '.dcm':
+            try:
+                import pydicom
+                ds = pydicom.dcmread(file_path)
+                # Extract waveform data
+                if hasattr(ds, 'WaveformSequence') and len(ds.WaveformSequence) > 0:
+                    waveform_item = ds.WaveformSequence[0]
+                    ecg = np.array(waveform_item.WaveformData, dtype=np.float32)
+                    n_channels = waveform_item.NumberOfWaveformChannels
+                    n_samples = waveform_item.NumberofWaveformSamples
+                    ecg = ecg.reshape(n_channels, n_samples)
+                    print(f"DICOM loaded: {ecg.shape}")
+                    return ecg
+                else:
+                    raise Exception("No waveform data in DICOM file")
+            except ImportError:
+                raise Exception("pydicom required: pip install pydicom")
+        # XML Format (.xml) - HL7 aECG
+        elif extension == '.xml':
+            try:
+                import xml.etree.ElementTree as ET
+                tree = ET.parse(file_path)
+                root = tree.getroot()
+                # Extract waveform data from XML (HL7 aECG structure)
+                waveforms = []
+                for series in root.findall('.//{urn:hl7-org:v3}series'):
+                    data_str = series.text
+                    if data_str:
+                        values = [float(x) for x in data_str.split()]
+                        waveforms.append(values)
+                if waveforms:
+                    # Pad to same length
+                    max_len = max(len(w) for w in waveforms)
+                    ecg = np.array([np.pad(w, (0, max_len - len(w)), mode='edge') for w in waveforms])
+                    print(f"XML (HL7 aECG) loaded: {ecg.shape}")
+                    return ecg
+                else:
+                    raise Exception("No waveform data in XML file")
+            except Exception as e:
+                raise Exception(f"XML parsing error: {str(e)}")
+        # NumPy Format (.npy)
+        elif extension == '.npy':
+            ecg = np.load(file_path)
+            print(f"NumPy loaded: {ecg.shape}")
+            return ecg
+        # Binary Formats (.raw, .bin, .bat, .ecg)
+        elif extension in ['.raw', '.bin', '.bat', '.ecg']:
+            try:
+                # Try as float32 binary
+                ecg = np.fromfile(file_path, dtype=np.float32)
+                # Reshape if looks like multi-channel
+                if len(ecg) % 12 == 0:
+                    ecg = ecg.reshape(12, -1)
+                elif len(ecg) % 2 == 0:
+                    ecg = ecg.reshape(2, -1)
+                else:
+                    ecg = ecg.reshape(1, -1)
+                print(f"Binary (float32) loaded: {ecg.shape}")
+                return ecg
+            except:
+                try:
+                    # Try as float64
+                    ecg = np.fromfile(file_path, dtype=np.float64)
+                    if len(ecg) % 12 == 0:
+                        ecg = ecg.reshape(12, -1)
+                    elif len(ecg) % 2 == 0:
+                        ecg = ecg.reshape(2, -1)
+                    else:
+                        ecg = ecg.reshape(1, -1)
+                    print(f"Binary (float64) loaded: {ecg.shape}")
+                    return ecg
+                except:
+                    # Try as text
+                    ecg = np.loadtxt(file_path)
+                    if ecg.ndim == 1:
+                        ecg = ecg.reshape(1, -1)
+                    print(f"Binary as text loaded: {ecg.shape}")
+                    return ecg
+        # Text Formats (CSV, TXT, TSV, SCP-ECG)
+        else:
+            try:
+                # Try space-separated
+                ecg = np.genfromtxt(file_path, delimiter=None)
+            except:
+                try:
+                    # Try comma-separated
+                    ecg = np.loadtxt(file_path, delimiter=',')
+                except:
+                    try:
+                        # Try tab-separated
+                        ecg = np.loadtxt(file_path, delimiter='\t')
+                    except:
+                        # Try with skiprows for headers
+                        ecg = np.genfromtxt(file_path, delimiter=None, skip_header=1)
+            if ecg.ndim == 1:
+                ecg = ecg.reshape(1, -1)
+            print(f"Text format loaded: {ecg.shape}")
+            return ecg
+    except Exception as e:
+        raise Exception(f"Failed to load {extension} file: {str(e)}")
 # Load model
 model = None
 try:
     model.load_state_dict(state_dict, strict=False)
     model.to(device)
     model.eval()
+    print("Model loaded successfully")
 except Exception as e:
+    print(f"Error loading model: {e}")
     import traceback
     traceback.print_exc()
         else:
             file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
+        # Load ECG using universal loader
         print(f"Loading file: {file_path}")
+        try:
+            ecg = load_ecg_file(file_path)
+        except Exception as e:
+            return (f"**Loading Error**: {str(e)}", None)
         # Handle 1D array (single sample)
         if ecg.ndim == 1:
             gr.Markdown("""
             ### Upload Your ECG
+            **Clinical & Standardized Formats:**
+            - `.dcm` – DICOM (medical imaging, PACS systems)
+            - `.scp` – SCP-ECG (European interoperability standard)
+            - `.xml` – HL7 aECG / FDA XML (clinical trials, regulatory)
+            ** Research & PhysioNet Formats:**
+            - `.hea` + `.dat` – WFDB (MIT-BIH, PhysioNet) **Requires BOTH files**
+            - `.edf` – European Data Format (multi-channel biosignals)
+            **Generic / Export Formats:**
+            - `.csv / .txt / .tsv` – Text formats (auto-detects delimiter)
+            - `.npy` – NumPy arrays
+            - `.mat` – MATLAB format
+            - `.h5 / .hdf5` – HDF5 (efficient large-scale datasets)
+            - `.raw / .bin` – Binary ECG data
+            **Architecture Auto-Conversion:**
+            - Multi-lead (12 leads): Used directly
+            - Single-lead → Replicated to 12 leads
+            - Auto-pads/trims to 5000 samples per lead
+            **Supported Delimiters:** Space, comma, tab (auto-detected)
+            ---
+            **WFDB Note:** `.hea` and `.dat` must be in the same directory. If they're separate, please upload them as a ZIP archive or both files together if the interface allows multi-file selection.
             """)
             file_input = gr.File(
                 label="ECG File",
+                file_types=[".csv", ".txt", ".tsv", ".npy", ".hea", ".dat",
+                           ".dcm", ".mat", ".h5", ".hdf5", ".edf", ".xml",
+                           ".raw", ".bin", ".bat", ".ecg"],
                 type="filepath"
             )

requirements.txt CHANGED Viewed

@@ -7,3 +7,6 @@ scipy>=1.10.0
 plotly>=5.17.0
 huggingface-hub>=0.19.0
 wfdb>=4.0.0

 plotly>=5.17.0
 huggingface-hub>=0.19.0
 wfdb>=4.0.0
+pydicom>=2.3.0
+h5py>=3.6.0
+pyedflib>=0.1.30