Spaces:
Sleeping
Sleeping
Handle space-separated and univariate ECG files (UCR format)
Browse files
app.py
CHANGED
|
@@ -92,40 +92,73 @@ def predict_ecg(file_obj):
|
|
| 92 |
else:
|
| 93 |
file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
|
| 94 |
|
| 95 |
-
# Load ECG data
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
else:
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
# Validation
|
| 104 |
-
if ecg.ndim != 2:
|
| 105 |
return (
|
| 106 |
-
"**
|
| 107 |
-
f"
|
| 108 |
-
"
|
| 109 |
None
|
| 110 |
)
|
| 111 |
|
| 112 |
-
#
|
| 113 |
-
if ecg.shape[
|
| 114 |
-
if ecg.shape[1]
|
| 115 |
-
ecg = ecg.
|
| 116 |
else:
|
| 117 |
-
|
| 118 |
-
"**Invalid Dimensions**\n"
|
| 119 |
-
f"Got shape {ecg.shape}, expected (12, N)\n"
|
| 120 |
-
"Ensure file has 12 leads (rows) × N samples (columns)",
|
| 121 |
-
None
|
| 122 |
-
)
|
| 123 |
-
|
| 124 |
-
# Resize to 5000 samples
|
| 125 |
-
if ecg.shape[1] < 5000:
|
| 126 |
-
ecg = np.pad(ecg, ((0, 0), (0, 5000 - ecg.shape[1])), mode='edge')
|
| 127 |
-
else:
|
| 128 |
-
ecg = ecg[:, :5000]
|
| 129 |
|
| 130 |
# Normalize each lead independently
|
| 131 |
ecg = (ecg - ecg.mean(axis=1, keepdims=True)) / (ecg.std(axis=1, keepdims=True) + 1e-8)
|
|
|
|
| 92 |
else:
|
| 93 |
file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
|
| 94 |
|
| 95 |
+
# Load ECG data - handle multiple formats
|
| 96 |
+
print(f"Loading file: {file_path}")
|
| 97 |
+
|
| 98 |
+
try:
|
| 99 |
+
# Try space-separated (UCR/ArrowHead format)
|
| 100 |
+
ecg = np.genfromtxt(file_path, delimiter=None)
|
| 101 |
+
except:
|
| 102 |
+
try:
|
| 103 |
+
# Try comma-separated
|
| 104 |
+
ecg = np.loadtxt(file_path, delimiter=',')
|
| 105 |
+
except:
|
| 106 |
+
# Try tab-separated
|
| 107 |
+
ecg = np.loadtxt(file_path, delimiter='\t')
|
| 108 |
+
|
| 109 |
+
print(f"Loaded shape: {ecg.shape}")
|
| 110 |
+
|
| 111 |
+
# Handle 1D array (single sample)
|
| 112 |
+
if ecg.ndim == 1:
|
| 113 |
+
ecg = ecg.reshape(1, -1)
|
| 114 |
+
|
| 115 |
+
# Check if first column is class label (UCR format)
|
| 116 |
+
# If so, extract just the time series values
|
| 117 |
+
if ecg.shape[1] > 5000: # More than likely samples
|
| 118 |
+
print("Detected class label in first column, removing it...")
|
| 119 |
+
ecg = ecg[:, 1:] # Remove first column (class label)
|
| 120 |
+
|
| 121 |
+
# Now ecg should be 2D: (num_samples, num_values)
|
| 122 |
+
# We need (12, 5000) for our model
|
| 123 |
+
|
| 124 |
+
# If single sample, use it
|
| 125 |
+
if ecg.shape[0] == 1:
|
| 126 |
+
values = ecg[0, :]
|
| 127 |
else:
|
| 128 |
+
# Use first sample if multiple
|
| 129 |
+
values = ecg[0, :]
|
| 130 |
+
|
| 131 |
+
print(f"Time series values shape: {values.shape}")
|
| 132 |
+
|
| 133 |
+
# Handle single-lead data (repeat 12 times for compatibility)
|
| 134 |
+
if len(values) < 5000:
|
| 135 |
+
print(f"Padding: {len(values)} values → 5000")
|
| 136 |
+
values = np.pad(values, (0, 5000 - len(values)), mode='edge')
|
| 137 |
+
elif len(values) > 5000:
|
| 138 |
+
print(f"Trimming: {len(values)} values → 5000")
|
| 139 |
+
values = values[:5000]
|
| 140 |
+
|
| 141 |
+
# Reshape as (1 lead, 5000 samples) then replicate to 12 leads
|
| 142 |
+
print("Replicating single lead to 12 leads for model compatibility...")
|
| 143 |
+
ecg = np.tile(values, (12, 1))
|
| 144 |
+
|
| 145 |
+
print(f"Final shape: {ecg.shape}")
|
| 146 |
|
| 147 |
# Validation
|
| 148 |
+
if ecg.ndim != 2 or ecg.shape[0] != 12 or ecg.shape[1] != 5000:
|
| 149 |
return (
|
| 150 |
+
f"**Shape Error**\n"
|
| 151 |
+
f"Final shape: {ecg.shape}, expected (12, 5000)\n"
|
| 152 |
+
"File format not supported.",
|
| 153 |
None
|
| 154 |
)
|
| 155 |
|
| 156 |
+
# Resize to 5000 samples (already done in loading, but ensure consistency)
|
| 157 |
+
if ecg.shape[1] != 5000:
|
| 158 |
+
if ecg.shape[1] < 5000:
|
| 159 |
+
ecg = np.pad(ecg, ((0, 0), (0, 5000 - ecg.shape[1])), mode='edge')
|
| 160 |
else:
|
| 161 |
+
ecg = ecg[:, :5000]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
# Normalize each lead independently
|
| 164 |
ecg = (ecg - ecg.mean(axis=1, keepdims=True)) / (ecg.std(axis=1, keepdims=True) + 1e-8)
|