Spaces:

MCP-1st-Birthday
/

HDF5-NetCDF-MCP

Sleeping

App Files Files Community

JG1310 commited on Nov 30, 2025

Commit

0ec3cf1

verified ·

1 Parent(s): 22d7108

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -16

app.py CHANGED Viewed

@@ -1538,7 +1538,14 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
         Tuple of (status dict, path to saved HTML file)
     """
     import plotly.graph_objects as go
-    from scipy import stats
     try:
         if not file_path:
@@ -1555,9 +1562,10 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
         f, file_type = open_file_with_fallback(file_path)
         try:
-            # Get X dataset
             if file_type == "HDF5":
                 x_var = f[x_dataset_path]
                 y_var = f[y_dataset_path]
             else:
                 x_var = f.variables[x_dataset_path]
@@ -1575,7 +1583,7 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
                     error_result["suggested_slice_x"] = x_safety["suggested_slice"]
                 return error_result, None
-            # Check memory safety for Y
             y_safety = check_memory_safety(y_var.shape, y_var.dtype, y_slice_str, memory_limit_mb)
             if not y_safety["safe"]:
                 error_result = {
@@ -1587,26 +1595,21 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
                     error_result["suggested_slice_y"] = y_safety["suggested_slice"]
                 return error_result, None
-            # Read X data
             if x_slice_str and x_slice_str.strip():
                 x_idx = parse_slice(x_slice_str)
-                x_data = x_var[x_idx]
             else:
-                x_data = x_var[:]
-            # Read Y data
             if y_slice_str and y_slice_str.strip():
                 y_idx = parse_slice(y_slice_str)
-                y_data = y_var[y_idx]
             else:
-                y_data = y_var[:]
-            # Convert to numpy and flatten
-            if not isinstance(x_data, np.ndarray):
-                x_data = np.array(x_data)
-            if not isinstance(y_data, np.ndarray):
-                y_data = np.array(y_data)
             x_data = x_data.flatten()
             y_data = y_data.flatten()
@@ -1616,6 +1619,8 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
                     "error": f"X and Y data length mismatch after slicing: {len(x_data)} vs {len(y_data)}",
                     "x_slice": x_slice_str if x_slice_str else "no slice",
                     "y_slice": y_slice_str if y_slice_str else "no slice",
                     "suggestion": "Adjust slices to produce equal-length arrays",
                     "status": "failed"
                 }, None
@@ -1623,7 +1628,13 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
         finally:
             f.close()
-        # Calculate correlation
         correlation = float(np.corrcoef(x_data, y_data)[0, 1])
         # Linear regression

         Tuple of (status dict, path to saved HTML file)
     """
     import plotly.graph_objects as go
+    try:
+        from scipy import stats
+    except ImportError:
+        return {
+            "error": "scipy library not available. Install with: pip install scipy",
+            "status": "failed"
+        }, None
     try:
         if not file_path:
         f, file_type = open_file_with_fallback(file_path)
         try:
+            # Get X and Y datasets (may be the same variable)
             if file_type == "HDF5":
                 x_var = f[x_dataset_path]
+                # Get Y separately even if same path (avoid reference issues)
                 y_var = f[y_dataset_path]
             else:
                 x_var = f.variables[x_dataset_path]
                     error_result["suggested_slice_x"] = x_safety["suggested_slice"]
                 return error_result, None
+            # Check memory safety for Y (use same variable shape if same dataset)
             y_safety = check_memory_safety(y_var.shape, y_var.dtype, y_slice_str, memory_limit_mb)
             if not y_safety["safe"]:
                 error_result = {
                     error_result["suggested_slice_y"] = y_safety["suggested_slice"]
                 return error_result, None
+            # Read X data - parse slice and read
             if x_slice_str and x_slice_str.strip():
                 x_idx = parse_slice(x_slice_str)
+                x_data = np.array(x_var[x_idx])  # Force copy
             else:
+                x_data = np.array(x_var[:])  # Force copy
+            # Read Y data - parse slice and read
             if y_slice_str and y_slice_str.strip():
                 y_idx = parse_slice(y_slice_str)
+                y_data = np.array(y_var[y_idx])  # Force copy
             else:
+                y_data = np.array(y_var[:])  # Force copy
+            # Flatten both arrays
             x_data = x_data.flatten()
             y_data = y_data.flatten()
                     "error": f"X and Y data length mismatch after slicing: {len(x_data)} vs {len(y_data)}",
                     "x_slice": x_slice_str if x_slice_str else "no slice",
                     "y_slice": y_slice_str if y_slice_str else "no slice",
+                    "x_shape_after_slice": x_data.shape,
+                    "y_shape_after_slice": y_data.shape,
                     "suggestion": "Adjust slices to produce equal-length arrays",
                     "status": "failed"
                 }, None
         finally:
             f.close()
+        # Calculate correlation (check for valid data)
+        if len(x_data) < 2:
+            return {
+                "error": f"Not enough data points for correlation: {len(x_data)} points (need at least 2)",
+                "status": "failed"
+            }, None
         correlation = float(np.corrcoef(x_data, y_data)[0, 1])
         # Linear regression