Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1538,7 +1538,14 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
|
|
| 1538 |
Tuple of (status dict, path to saved HTML file)
|
| 1539 |
"""
|
| 1540 |
import plotly.graph_objects as go
|
| 1541 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1542 |
|
| 1543 |
try:
|
| 1544 |
if not file_path:
|
|
@@ -1555,9 +1562,10 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
|
|
| 1555 |
f, file_type = open_file_with_fallback(file_path)
|
| 1556 |
|
| 1557 |
try:
|
| 1558 |
-
# Get X
|
| 1559 |
if file_type == "HDF5":
|
| 1560 |
x_var = f[x_dataset_path]
|
|
|
|
| 1561 |
y_var = f[y_dataset_path]
|
| 1562 |
else:
|
| 1563 |
x_var = f.variables[x_dataset_path]
|
|
@@ -1575,7 +1583,7 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
|
|
| 1575 |
error_result["suggested_slice_x"] = x_safety["suggested_slice"]
|
| 1576 |
return error_result, None
|
| 1577 |
|
| 1578 |
-
# Check memory safety for Y
|
| 1579 |
y_safety = check_memory_safety(y_var.shape, y_var.dtype, y_slice_str, memory_limit_mb)
|
| 1580 |
if not y_safety["safe"]:
|
| 1581 |
error_result = {
|
|
@@ -1587,26 +1595,21 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
|
|
| 1587 |
error_result["suggested_slice_y"] = y_safety["suggested_slice"]
|
| 1588 |
return error_result, None
|
| 1589 |
|
| 1590 |
-
# Read X data
|
| 1591 |
if x_slice_str and x_slice_str.strip():
|
| 1592 |
x_idx = parse_slice(x_slice_str)
|
| 1593 |
-
x_data = x_var[x_idx]
|
| 1594 |
else:
|
| 1595 |
-
x_data = x_var[:]
|
| 1596 |
|
| 1597 |
-
# Read Y data
|
| 1598 |
if y_slice_str and y_slice_str.strip():
|
| 1599 |
y_idx = parse_slice(y_slice_str)
|
| 1600 |
-
y_data = y_var[y_idx]
|
| 1601 |
else:
|
| 1602 |
-
y_data = y_var[:]
|
| 1603 |
-
|
| 1604 |
-
# Convert to numpy and flatten
|
| 1605 |
-
if not isinstance(x_data, np.ndarray):
|
| 1606 |
-
x_data = np.array(x_data)
|
| 1607 |
-
if not isinstance(y_data, np.ndarray):
|
| 1608 |
-
y_data = np.array(y_data)
|
| 1609 |
|
|
|
|
| 1610 |
x_data = x_data.flatten()
|
| 1611 |
y_data = y_data.flatten()
|
| 1612 |
|
|
@@ -1616,6 +1619,8 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
|
|
| 1616 |
"error": f"X and Y data length mismatch after slicing: {len(x_data)} vs {len(y_data)}",
|
| 1617 |
"x_slice": x_slice_str if x_slice_str else "no slice",
|
| 1618 |
"y_slice": y_slice_str if y_slice_str else "no slice",
|
|
|
|
|
|
|
| 1619 |
"suggestion": "Adjust slices to produce equal-length arrays",
|
| 1620 |
"status": "failed"
|
| 1621 |
}, None
|
|
@@ -1623,7 +1628,13 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
|
|
| 1623 |
finally:
|
| 1624 |
f.close()
|
| 1625 |
|
| 1626 |
-
# Calculate correlation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1627 |
correlation = float(np.corrcoef(x_data, y_data)[0, 1])
|
| 1628 |
|
| 1629 |
# Linear regression
|
|
|
|
| 1538 |
Tuple of (status dict, path to saved HTML file)
|
| 1539 |
"""
|
| 1540 |
import plotly.graph_objects as go
|
| 1541 |
+
|
| 1542 |
+
try:
|
| 1543 |
+
from scipy import stats
|
| 1544 |
+
except ImportError:
|
| 1545 |
+
return {
|
| 1546 |
+
"error": "scipy library not available. Install with: pip install scipy",
|
| 1547 |
+
"status": "failed"
|
| 1548 |
+
}, None
|
| 1549 |
|
| 1550 |
try:
|
| 1551 |
if not file_path:
|
|
|
|
| 1562 |
f, file_type = open_file_with_fallback(file_path)
|
| 1563 |
|
| 1564 |
try:
|
| 1565 |
+
# Get X and Y datasets (may be the same variable)
|
| 1566 |
if file_type == "HDF5":
|
| 1567 |
x_var = f[x_dataset_path]
|
| 1568 |
+
# Get Y separately even if same path (avoid reference issues)
|
| 1569 |
y_var = f[y_dataset_path]
|
| 1570 |
else:
|
| 1571 |
x_var = f.variables[x_dataset_path]
|
|
|
|
| 1583 |
error_result["suggested_slice_x"] = x_safety["suggested_slice"]
|
| 1584 |
return error_result, None
|
| 1585 |
|
| 1586 |
+
# Check memory safety for Y (use same variable shape if same dataset)
|
| 1587 |
y_safety = check_memory_safety(y_var.shape, y_var.dtype, y_slice_str, memory_limit_mb)
|
| 1588 |
if not y_safety["safe"]:
|
| 1589 |
error_result = {
|
|
|
|
| 1595 |
error_result["suggested_slice_y"] = y_safety["suggested_slice"]
|
| 1596 |
return error_result, None
|
| 1597 |
|
| 1598 |
+
# Read X data - parse slice and read
|
| 1599 |
if x_slice_str and x_slice_str.strip():
|
| 1600 |
x_idx = parse_slice(x_slice_str)
|
| 1601 |
+
x_data = np.array(x_var[x_idx]) # Force copy
|
| 1602 |
else:
|
| 1603 |
+
x_data = np.array(x_var[:]) # Force copy
|
| 1604 |
|
| 1605 |
+
# Read Y data - parse slice and read
|
| 1606 |
if y_slice_str and y_slice_str.strip():
|
| 1607 |
y_idx = parse_slice(y_slice_str)
|
| 1608 |
+
y_data = np.array(y_var[y_idx]) # Force copy
|
| 1609 |
else:
|
| 1610 |
+
y_data = np.array(y_var[:]) # Force copy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1611 |
|
| 1612 |
+
# Flatten both arrays
|
| 1613 |
x_data = x_data.flatten()
|
| 1614 |
y_data = y_data.flatten()
|
| 1615 |
|
|
|
|
| 1619 |
"error": f"X and Y data length mismatch after slicing: {len(x_data)} vs {len(y_data)}",
|
| 1620 |
"x_slice": x_slice_str if x_slice_str else "no slice",
|
| 1621 |
"y_slice": y_slice_str if y_slice_str else "no slice",
|
| 1622 |
+
"x_shape_after_slice": x_data.shape,
|
| 1623 |
+
"y_shape_after_slice": y_data.shape,
|
| 1624 |
"suggestion": "Adjust slices to produce equal-length arrays",
|
| 1625 |
"status": "failed"
|
| 1626 |
}, None
|
|
|
|
| 1628 |
finally:
|
| 1629 |
f.close()
|
| 1630 |
|
| 1631 |
+
# Calculate correlation (check for valid data)
|
| 1632 |
+
if len(x_data) < 2:
|
| 1633 |
+
return {
|
| 1634 |
+
"error": f"Not enough data points for correlation: {len(x_data)} points (need at least 2)",
|
| 1635 |
+
"status": "failed"
|
| 1636 |
+
}, None
|
| 1637 |
+
|
| 1638 |
correlation = float(np.corrcoef(x_data, y_data)[0, 1])
|
| 1639 |
|
| 1640 |
# Linear regression
|