Tumo505 commited on
Commit
d4303a2
Β·
1 Parent(s): a2229cb

fix wfdb error

Browse files
Files changed (2) hide show
  1. app.py +268 -84
  2. requirements.txt +3 -0
app.py CHANGED
@@ -11,6 +11,7 @@ import numpy as np
11
  import plotly.graph_objects as go
12
  from huggingface_hub import hf_hub_download
13
  import tempfile
 
14
 
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
 
@@ -52,6 +53,239 @@ class ECGClassifier(nn.Module):
52
  logits = self.classifier(embeddings)
53
  return logits
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  # Load model
56
  model = None
57
  try:
@@ -69,9 +303,9 @@ try:
69
  model.load_state_dict(state_dict, strict=False)
70
  model.to(device)
71
  model.eval()
72
- print("βœ… Model loaded successfully")
73
  except Exception as e:
74
- print(f"❌ Error loading model: {e}")
75
  import traceback
76
  traceback.print_exc()
77
 
@@ -92,71 +326,12 @@ def predict_ecg(file_obj):
92
  else:
93
  file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
94
 
95
- # Load ECG data - handle multiple formats
96
  print(f"Loading file: {file_path}")
97
-
98
- # Detect format by extension
99
- if file_path.endswith('.hea'):
100
- # WFDB (PhysioNet) format
101
- print("Detected WFDB (.hea) format...")
102
- try:
103
- import wfdb
104
- # Load the record (without extension)
105
- record_path = file_path.replace('.hea', '')
106
- record = wfdb.rdrecord(record_path)
107
- ecg = record.p_signal # Get signal data
108
- print(f"WFDB loaded: {ecg.shape}")
109
- except ImportError:
110
- return (
111
- "**Error**: WFDB library not installed\n"
112
- "Python-wfdb package required for .hea files",
113
- None
114
- )
115
- except Exception as e:
116
- return (f"**WFDB Error**: {str(e)}", None)
117
-
118
- elif file_path.endswith('.bat'):
119
- # BAT format (binary or text batch format)
120
- print("Detected BAT format...")
121
- try:
122
- # Try reading as binary NumPy array first
123
- ecg = np.fromfile(file_path, dtype=np.float32)
124
- # Reshape if needed - assume 128 samples per lead or similar
125
- if len(ecg) % 12 == 0:
126
- ecg = ecg.reshape(12, -1)
127
- else:
128
- # Single lead, will be replicated later
129
- ecg = ecg.reshape(1, -1)
130
- print(f"BAT loaded: {ecg.shape}")
131
- except:
132
- try:
133
- # Try text format
134
- ecg = np.loadtxt(file_path)
135
- if ecg.ndim == 1:
136
- ecg = ecg.reshape(1, -1)
137
- except Exception as e:
138
- return (f"**BAT Error**: Could not parse file - {str(e)}", None)
139
-
140
- else:
141
- # Try standard text formats (CSV, space-separated, tab-separated, .npy)
142
- try:
143
- if file_path.endswith('.npy'):
144
- ecg = np.load(file_path)
145
- else:
146
- # Try space-separated (UCR/ArrowHead format)
147
- ecg = np.genfromtxt(file_path, delimiter=None)
148
- except:
149
- try:
150
- # Try comma-separated
151
- ecg = np.loadtxt(file_path, delimiter=',')
152
- except:
153
- try:
154
- # Try tab-separated
155
- ecg = np.loadtxt(file_path, delimiter='\t')
156
- except Exception as e:
157
- return (f"**File Error**: Could not parse file format - {str(e)}", None)
158
-
159
- print(f"Loaded shape: {ecg.shape}")
160
 
161
  # Handle 1D array (single sample)
162
  if ecg.ndim == 1:
@@ -319,30 +494,39 @@ with gr.Blocks(
319
  gr.Markdown("""
320
  ### Upload Your ECG
321
 
322
- **Supported Formats:**
323
- - CSV / TSV / TXT (space, comma, or tab-separated)
324
- - NumPy .npy file
325
- - WFDB .hea (PhysioNet format)
326
- - BAT (binary or batch ECG files)
327
- - UCR/ArrowHead format (UCR5000, etc.)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
 
329
- **Requirements:**
330
- - Multi-lead: (N leads, M samples)
331
- - Single-lead: Will be replicated to 12 leads
332
- - Sampling Rate: Any (will be normalized)
333
- - Will auto-pad/trim to 5000 samples
334
 
335
- **Example Structure (CSV/TXT):**
336
- ```
337
- lead_I, lead_II, ..., lead_aVF
338
- 0.123, 0.456, ..., 0.789
339
- ...
340
- ```
341
  """)
342
 
343
  file_input = gr.File(
344
  label="ECG File",
345
- file_types=[".csv", ".txt", ".tsv", ".npy", ".hea", ".bat"],
 
 
346
  type="filepath"
347
  )
348
 
 
11
  import plotly.graph_objects as go
12
  from huggingface_hub import hf_hub_download
13
  import tempfile
14
+ from pathlib import Path
15
 
16
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
 
 
53
  logits = self.classifier(embeddings)
54
  return logits
55
 
56
+ def load_ecg_file(file_path):
57
+ """
58
+ Comprehensive ECG file loader supporting multiple formats
59
+
60
+ Supported formats:
61
+ - Text: CSV, TXT, TSV (any delimiter)
62
+ - NumPy: .npy
63
+ - PhysioNet: .hea/.dat (WFDB)
64
+ - MATLAB: .mat
65
+ - HDF5: .h5, .hdf5
66
+ - EDF: .edf (European Data Format)
67
+ - DICOM: .dcm
68
+ - XML: .xml (HL7 aECG)
69
+ - Binary: .raw, .bin, .bat
70
+ """
71
+ file_path = str(file_path)
72
+ extension = Path(file_path).suffix.lower()
73
+
74
+ print(f"Loading {extension} format from: {file_path}")
75
+
76
+ try:
77
+ # WFDB Format (.hea/.dat)
78
+ if extension == '.hea':
79
+ try:
80
+ import wfdb
81
+ record_path = file_path.replace('.hea', '')
82
+ # Check if .dat file exists
83
+ dat_path = record_path + '.dat'
84
+ if not Path(dat_path).exists():
85
+ raise Exception(
86
+ "WFDB format requires TWO files:\n"
87
+ f" 1. {Path(file_path).name} (header)\n"
88
+ f" 2. {Path(dat_path).name} (data)\n\n"
89
+ "Please upload both files and try again, or upload just the .hea or .dat file in a ZIP archive."
90
+ )
91
+ record = wfdb.rdrecord(record_path)
92
+ ecg = record.p_signal
93
+ print(f"WFDB (.hea/.dat) loaded: {ecg.shape}")
94
+ return ecg
95
+ except Exception as e:
96
+ if "WFDB format requires" in str(e):
97
+ raise e
98
+ raise Exception(f"WFDB error: {str(e)}")
99
+
100
+ # Handle .dat files (paired with .hea)
101
+ elif extension == '.dat':
102
+ try:
103
+ import wfdb
104
+ record_path = file_path.replace('.dat', '')
105
+ hea_path = record_path + '.hea'
106
+ if not Path(hea_path).exists():
107
+ raise Exception(
108
+ "WFDB format requires TWO files:\n"
109
+ f" 1. {Path(hea_path).name} (header)\n"
110
+ f" 2. {Path(file_path).name} (data)\n\n"
111
+ "Please upload both files and try again, or upload both files in a ZIP archive."
112
+ )
113
+ record = wfdb.rdrecord(record_path)
114
+ ecg = record.p_signal
115
+ print(f"WFDB (.hea/.dat) loaded: {ecg.shape}")
116
+ return ecg
117
+ except Exception as e:
118
+ if "WFDB format requires" in str(e):
119
+ raise e
120
+ raise Exception(f"WFDB error: {str(e)}")
121
+
122
+ # MATLAB Format (.mat)
123
+ elif extension == '.mat':
124
+ try:
125
+ from scipy import io
126
+ mat_data = io.loadmat(file_path)
127
+ # Try common variable names
128
+ for key in ['ecg', 'ECG', 'signal', 'data', 'val']:
129
+ if key in mat_data:
130
+ ecg = np.array(mat_data[key])
131
+ print(f"MATLAB loaded ({key}): {ecg.shape}")
132
+ return ecg
133
+ # If no standard key, use largest array
134
+ arrays = {k: v for k, v in mat_data.items() if isinstance(v, np.ndarray) and v.ndim <= 2}
135
+ if arrays:
136
+ key = max(arrays.keys(), key=lambda k: arrays[k].size)
137
+ ecg = arrays[key]
138
+ print(f"MATLAB loaded ({key}): {ecg.shape}")
139
+ return ecg
140
+ raise Exception("No ECG data found in .mat file")
141
+ except ImportError:
142
+ raise Exception("SciPy required: pip install scipy")
143
+
144
+ # HDF5 Format (.h5, .hdf5)
145
+ elif extension in ['.h5', '.hdf5']:
146
+ try:
147
+ import h5py
148
+ with h5py.File(file_path, 'r') as f:
149
+ # Try common keys
150
+ for key in ['ecg', 'ECG', 'signal', 'data', 'waveform']:
151
+ if key in f:
152
+ ecg = np.array(f[key])
153
+ print(f"HDF5 loaded ({key}): {ecg.shape}")
154
+ return ecg
155
+ # Use first dataset if no standard key
156
+ keys = list(f.keys())
157
+ if keys:
158
+ key = keys[0]
159
+ ecg = np.array(f[key])
160
+ print(f"HDF5 loaded ({key}): {ecg.shape}")
161
+ return ecg
162
+ raise Exception("No ECG data found in HDF5 file")
163
+ except ImportError:
164
+ raise Exception("h5py required: pip install h5py")
165
+
166
+ # EDF Format (.edf)
167
+ elif extension == '.edf':
168
+ try:
169
+ import pyedflib
170
+ f = pyedflib.EdfReader(file_path)
171
+ n = f.signals_in_file
172
+ ecg = np.zeros((n, f.getNSamples()[0]))
173
+ for i in range(n):
174
+ ecg[i, :] = f.readSignal(i)
175
+ f.close()
176
+ print(f"EDF loaded: {ecg.shape}")
177
+ return ecg
178
+ except ImportError:
179
+ raise Exception("pyedflib required: pip install pyedflib")
180
+
181
+ # DICOM Format (.dcm)
182
+ elif extension == '.dcm':
183
+ try:
184
+ import pydicom
185
+ ds = pydicom.dcmread(file_path)
186
+ # Extract waveform data
187
+ if hasattr(ds, 'WaveformSequence') and len(ds.WaveformSequence) > 0:
188
+ waveform_item = ds.WaveformSequence[0]
189
+ ecg = np.array(waveform_item.WaveformData, dtype=np.float32)
190
+ n_channels = waveform_item.NumberOfWaveformChannels
191
+ n_samples = waveform_item.NumberofWaveformSamples
192
+ ecg = ecg.reshape(n_channels, n_samples)
193
+ print(f"DICOM loaded: {ecg.shape}")
194
+ return ecg
195
+ else:
196
+ raise Exception("No waveform data in DICOM file")
197
+ except ImportError:
198
+ raise Exception("pydicom required: pip install pydicom")
199
+
200
+ # XML Format (.xml) - HL7 aECG
201
+ elif extension == '.xml':
202
+ try:
203
+ import xml.etree.ElementTree as ET
204
+ tree = ET.parse(file_path)
205
+ root = tree.getroot()
206
+ # Extract waveform data from XML (HL7 aECG structure)
207
+ waveforms = []
208
+ for series in root.findall('.//{urn:hl7-org:v3}series'):
209
+ data_str = series.text
210
+ if data_str:
211
+ values = [float(x) for x in data_str.split()]
212
+ waveforms.append(values)
213
+ if waveforms:
214
+ # Pad to same length
215
+ max_len = max(len(w) for w in waveforms)
216
+ ecg = np.array([np.pad(w, (0, max_len - len(w)), mode='edge') for w in waveforms])
217
+ print(f"XML (HL7 aECG) loaded: {ecg.shape}")
218
+ return ecg
219
+ else:
220
+ raise Exception("No waveform data in XML file")
221
+ except Exception as e:
222
+ raise Exception(f"XML parsing error: {str(e)}")
223
+
224
+ # NumPy Format (.npy)
225
+ elif extension == '.npy':
226
+ ecg = np.load(file_path)
227
+ print(f"NumPy loaded: {ecg.shape}")
228
+ return ecg
229
+
230
+ # Binary Formats (.raw, .bin, .bat, .ecg)
231
+ elif extension in ['.raw', '.bin', '.bat', '.ecg']:
232
+ try:
233
+ # Try as float32 binary
234
+ ecg = np.fromfile(file_path, dtype=np.float32)
235
+ # Reshape if looks like multi-channel
236
+ if len(ecg) % 12 == 0:
237
+ ecg = ecg.reshape(12, -1)
238
+ elif len(ecg) % 2 == 0:
239
+ ecg = ecg.reshape(2, -1)
240
+ else:
241
+ ecg = ecg.reshape(1, -1)
242
+ print(f"Binary (float32) loaded: {ecg.shape}")
243
+ return ecg
244
+ except:
245
+ try:
246
+ # Try as float64
247
+ ecg = np.fromfile(file_path, dtype=np.float64)
248
+ if len(ecg) % 12 == 0:
249
+ ecg = ecg.reshape(12, -1)
250
+ elif len(ecg) % 2 == 0:
251
+ ecg = ecg.reshape(2, -1)
252
+ else:
253
+ ecg = ecg.reshape(1, -1)
254
+ print(f"Binary (float64) loaded: {ecg.shape}")
255
+ return ecg
256
+ except:
257
+ # Try as text
258
+ ecg = np.loadtxt(file_path)
259
+ if ecg.ndim == 1:
260
+ ecg = ecg.reshape(1, -1)
261
+ print(f"Binary as text loaded: {ecg.shape}")
262
+ return ecg
263
+
264
+ # Text Formats (CSV, TXT, TSV, SCP-ECG)
265
+ else:
266
+ try:
267
+ # Try space-separated
268
+ ecg = np.genfromtxt(file_path, delimiter=None)
269
+ except:
270
+ try:
271
+ # Try comma-separated
272
+ ecg = np.loadtxt(file_path, delimiter=',')
273
+ except:
274
+ try:
275
+ # Try tab-separated
276
+ ecg = np.loadtxt(file_path, delimiter='\t')
277
+ except:
278
+ # Try with skiprows for headers
279
+ ecg = np.genfromtxt(file_path, delimiter=None, skip_header=1)
280
+
281
+ if ecg.ndim == 1:
282
+ ecg = ecg.reshape(1, -1)
283
+ print(f"Text format loaded: {ecg.shape}")
284
+ return ecg
285
+
286
+ except Exception as e:
287
+ raise Exception(f"Failed to load {extension} file: {str(e)}")
288
+
289
  # Load model
290
  model = None
291
  try:
 
303
  model.load_state_dict(state_dict, strict=False)
304
  model.to(device)
305
  model.eval()
306
+ print("Model loaded successfully")
307
  except Exception as e:
308
+ print(f"Error loading model: {e}")
309
  import traceback
310
  traceback.print_exc()
311
 
 
326
  else:
327
  file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
328
 
329
+ # Load ECG using universal loader
330
  print(f"Loading file: {file_path}")
331
+ try:
332
+ ecg = load_ecg_file(file_path)
333
+ except Exception as e:
334
+ return (f"**Loading Error**: {str(e)}", None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
  # Handle 1D array (single sample)
337
  if ecg.ndim == 1:
 
494
  gr.Markdown("""
495
  ### Upload Your ECG
496
 
497
+ **Clinical & Standardized Formats:**
498
+ - `.dcm` – DICOM (medical imaging, PACS systems)
499
+ - `.scp` – SCP-ECG (European interoperability standard)
500
+ - `.xml` – HL7 aECG / FDA XML (clinical trials, regulatory)
501
+
502
+ ** Research & PhysioNet Formats:**
503
+ - `.hea` + `.dat` – WFDB (MIT-BIH, PhysioNet) **Requires BOTH files**
504
+ - `.edf` – European Data Format (multi-channel biosignals)
505
+
506
+ **Generic / Export Formats:**
507
+ - `.csv / .txt / .tsv` – Text formats (auto-detects delimiter)
508
+ - `.npy` – NumPy arrays
509
+ - `.mat` – MATLAB format
510
+ - `.h5 / .hdf5` – HDF5 (efficient large-scale datasets)
511
+ - `.raw / .bin` – Binary ECG data
512
+
513
+ **Architecture Auto-Conversion:**
514
+ - Multi-lead (12 leads): Used directly
515
+ - Single-lead β†’ Replicated to 12 leads
516
+ - Auto-pads/trims to 5000 samples per lead
517
+
518
+ **Supported Delimiters:** Space, comma, tab (auto-detected)
519
 
520
+ ---
 
 
 
 
521
 
522
+ **WFDB Note:** `.hea` and `.dat` must be in the same directory. If they're separate, please upload them as a ZIP archive or both files together if the interface allows multi-file selection.
 
 
 
 
 
523
  """)
524
 
525
  file_input = gr.File(
526
  label="ECG File",
527
+ file_types=[".csv", ".txt", ".tsv", ".npy", ".hea", ".dat",
528
+ ".dcm", ".mat", ".h5", ".hdf5", ".edf", ".xml",
529
+ ".raw", ".bin", ".bat", ".ecg"],
530
  type="filepath"
531
  )
532
 
requirements.txt CHANGED
@@ -7,3 +7,6 @@ scipy>=1.10.0
7
  plotly>=5.17.0
8
  huggingface-hub>=0.19.0
9
  wfdb>=4.0.0
 
 
 
 
7
  plotly>=5.17.0
8
  huggingface-hub>=0.19.0
9
  wfdb>=4.0.0
10
+ pydicom>=2.3.0
11
+ h5py>=3.6.0
12
+ pyedflib>=0.1.30