Spaces:

Tumo505
/

SSL-ECG-Classification

Sleeping

App Files Files Community

Tumo505 commited on Apr 19

Commit

e184b08

1 Parent(s): 6fcc7e5

Handle space-separated and univariate ECG files (UCR format)

Browse files

Files changed (1) hide show

app.py +59 -26

app.py CHANGED Viewed

@@ -92,40 +92,73 @@ def predict_ecg(file_obj):
         else:
             file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
-        # Load ECG data
-        if file_path.endswith(('.csv', '.txt')):
-            ecg = np.loadtxt(file_path, delimiter=',')
-        elif file_path.endswith('.npy'):
-            ecg = np.load(file_path)
         else:
-            ecg = np.genfromtxt(file_path)
         # Validation
-        if ecg.ndim != 2:
             return (
-                "**Invalid Format**\n"
-                f"Expected 2D array, got shape {ecg.shape}\n"
-                "Expected: (12 leads, N samples)",
                 None
             )
-        # Handle transposition
-        if ecg.shape[0] != 12:
-            if ecg.shape[1] == 12:
-                ecg = ecg.T
             else:
-                return (
-                    "**Invalid Dimensions**\n"
-                    f"Got shape {ecg.shape}, expected (12, N)\n"
-                    "Ensure file has 12 leads (rows) × N samples (columns)",
-                    None
-                )
-        # Resize to 5000 samples
-        if ecg.shape[1] < 5000:
-            ecg = np.pad(ecg, ((0, 0), (0, 5000 - ecg.shape[1])), mode='edge')
-        else:
-            ecg = ecg[:, :5000]
         # Normalize each lead independently
         ecg = (ecg - ecg.mean(axis=1, keepdims=True)) / (ecg.std(axis=1, keepdims=True) + 1e-8)

         else:
             file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
+        # Load ECG data - handle multiple formats
+        print(f"Loading file: {file_path}")
+        try:
+            # Try space-separated (UCR/ArrowHead format)
+            ecg = np.genfromtxt(file_path, delimiter=None)
+        except:
+            try:
+                # Try comma-separated
+                ecg = np.loadtxt(file_path, delimiter=',')
+            except:
+                # Try tab-separated
+                ecg = np.loadtxt(file_path, delimiter='\t')
+        print(f"Loaded shape: {ecg.shape}")
+        # Handle 1D array (single sample)
+        if ecg.ndim == 1:
+            ecg = ecg.reshape(1, -1)
+        # Check if first column is class label (UCR format)
+        # If so, extract just the time series values
+        if ecg.shape[1] > 5000:  # More than likely samples
+            print("Detected class label in first column, removing it...")
+            ecg = ecg[:, 1:]  # Remove first column (class label)
+        # Now ecg should be 2D: (num_samples, num_values)
+        # We need (12, 5000) for our model
+        # If single sample, use it
+        if ecg.shape[0] == 1:
+            values = ecg[0, :]
         else:
+            # Use first sample if multiple
+            values = ecg[0, :]
+        print(f"Time series values shape: {values.shape}")
+        # Handle single-lead data (repeat 12 times for compatibility)
+        if len(values) < 5000:
+            print(f"Padding: {len(values)} values → 5000")
+            values = np.pad(values, (0, 5000 - len(values)), mode='edge')
+        elif len(values) > 5000:
+            print(f"Trimming: {len(values)} values → 5000")
+            values = values[:5000]
+        # Reshape as (1 lead, 5000 samples) then replicate to 12 leads
+        print("Replicating single lead to 12 leads for model compatibility...")
+        ecg = np.tile(values, (12, 1))
+        print(f"Final shape: {ecg.shape}")
         # Validation
+        if ecg.ndim != 2 or ecg.shape[0] != 12 or ecg.shape[1] != 5000:
             return (
+                f"**Shape Error**\n"
+                f"Final shape: {ecg.shape}, expected (12, 5000)\n"
+                "File format not supported.",
                 None
             )
+        # Resize to 5000 samples (already done in loading, but ensure consistency)
+        if ecg.shape[1] != 5000:
+            if ecg.shape[1] < 5000:
+                ecg = np.pad(ecg, ((0, 0), (0, 5000 - ecg.shape[1])), mode='edge')
             else:
+                ecg = ecg[:, :5000]
         # Normalize each lead independently
         ecg = (ecg - ecg.mean(axis=1, keepdims=True)) / (ecg.std(axis=1, keepdims=True) + 1e-8)