Tumo505 commited on
Commit
e184b08
·
1 Parent(s): 6fcc7e5

Handle space-separated and univariate ECG files (UCR format)

Browse files
Files changed (1) hide show
  1. app.py +59 -26
app.py CHANGED
@@ -92,40 +92,73 @@ def predict_ecg(file_obj):
92
  else:
93
  file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
94
 
95
- # Load ECG data
96
- if file_path.endswith(('.csv', '.txt')):
97
- ecg = np.loadtxt(file_path, delimiter=',')
98
- elif file_path.endswith('.npy'):
99
- ecg = np.load(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  else:
101
- ecg = np.genfromtxt(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  # Validation
104
- if ecg.ndim != 2:
105
  return (
106
- "**Invalid Format**\n"
107
- f"Expected 2D array, got shape {ecg.shape}\n"
108
- "Expected: (12 leads, N samples)",
109
  None
110
  )
111
 
112
- # Handle transposition
113
- if ecg.shape[0] != 12:
114
- if ecg.shape[1] == 12:
115
- ecg = ecg.T
116
  else:
117
- return (
118
- "**Invalid Dimensions**\n"
119
- f"Got shape {ecg.shape}, expected (12, N)\n"
120
- "Ensure file has 12 leads (rows) × N samples (columns)",
121
- None
122
- )
123
-
124
- # Resize to 5000 samples
125
- if ecg.shape[1] < 5000:
126
- ecg = np.pad(ecg, ((0, 0), (0, 5000 - ecg.shape[1])), mode='edge')
127
- else:
128
- ecg = ecg[:, :5000]
129
 
130
  # Normalize each lead independently
131
  ecg = (ecg - ecg.mean(axis=1, keepdims=True)) / (ecg.std(axis=1, keepdims=True) + 1e-8)
 
92
  else:
93
  file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
94
 
95
+ # Load ECG data - handle multiple formats
96
+ print(f"Loading file: {file_path}")
97
+
98
+ try:
99
+ # Try space-separated (UCR/ArrowHead format)
100
+ ecg = np.genfromtxt(file_path, delimiter=None)
101
+ except:
102
+ try:
103
+ # Try comma-separated
104
+ ecg = np.loadtxt(file_path, delimiter=',')
105
+ except:
106
+ # Try tab-separated
107
+ ecg = np.loadtxt(file_path, delimiter='\t')
108
+
109
+ print(f"Loaded shape: {ecg.shape}")
110
+
111
+ # Handle 1D array (single sample)
112
+ if ecg.ndim == 1:
113
+ ecg = ecg.reshape(1, -1)
114
+
115
+ # Check if first column is class label (UCR format)
116
+ # If so, extract just the time series values
117
+ if ecg.shape[1] > 5000: # More than likely samples
118
+ print("Detected class label in first column, removing it...")
119
+ ecg = ecg[:, 1:] # Remove first column (class label)
120
+
121
+ # Now ecg should be 2D: (num_samples, num_values)
122
+ # We need (12, 5000) for our model
123
+
124
+ # If single sample, use it
125
+ if ecg.shape[0] == 1:
126
+ values = ecg[0, :]
127
  else:
128
+ # Use first sample if multiple
129
+ values = ecg[0, :]
130
+
131
+ print(f"Time series values shape: {values.shape}")
132
+
133
+ # Handle single-lead data (repeat 12 times for compatibility)
134
+ if len(values) < 5000:
135
+ print(f"Padding: {len(values)} values → 5000")
136
+ values = np.pad(values, (0, 5000 - len(values)), mode='edge')
137
+ elif len(values) > 5000:
138
+ print(f"Trimming: {len(values)} values → 5000")
139
+ values = values[:5000]
140
+
141
+ # Reshape as (1 lead, 5000 samples) then replicate to 12 leads
142
+ print("Replicating single lead to 12 leads for model compatibility...")
143
+ ecg = np.tile(values, (12, 1))
144
+
145
+ print(f"Final shape: {ecg.shape}")
146
 
147
  # Validation
148
+ if ecg.ndim != 2 or ecg.shape[0] != 12 or ecg.shape[1] != 5000:
149
  return (
150
+ f"**Shape Error**\n"
151
+ f"Final shape: {ecg.shape}, expected (12, 5000)\n"
152
+ "File format not supported.",
153
  None
154
  )
155
 
156
+ # Resize to 5000 samples (already done in loading, but ensure consistency)
157
+ if ecg.shape[1] != 5000:
158
+ if ecg.shape[1] < 5000:
159
+ ecg = np.pad(ecg, ((0, 0), (0, 5000 - ecg.shape[1])), mode='edge')
160
  else:
161
+ ecg = ecg[:, :5000]
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  # Normalize each lead independently
164
  ecg = (ecg - ecg.mean(axis=1, keepdims=True)) / (ecg.std(axis=1, keepdims=True) + 1e-8)