JG1310 commited on
Commit
0ec3cf1
·
verified ·
1 Parent(s): 22d7108

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -16
app.py CHANGED
@@ -1538,7 +1538,14 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
1538
  Tuple of (status dict, path to saved HTML file)
1539
  """
1540
  import plotly.graph_objects as go
1541
- from scipy import stats
 
 
 
 
 
 
 
1542
 
1543
  try:
1544
  if not file_path:
@@ -1555,9 +1562,10 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
1555
  f, file_type = open_file_with_fallback(file_path)
1556
 
1557
  try:
1558
- # Get X dataset
1559
  if file_type == "HDF5":
1560
  x_var = f[x_dataset_path]
 
1561
  y_var = f[y_dataset_path]
1562
  else:
1563
  x_var = f.variables[x_dataset_path]
@@ -1575,7 +1583,7 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
1575
  error_result["suggested_slice_x"] = x_safety["suggested_slice"]
1576
  return error_result, None
1577
 
1578
- # Check memory safety for Y
1579
  y_safety = check_memory_safety(y_var.shape, y_var.dtype, y_slice_str, memory_limit_mb)
1580
  if not y_safety["safe"]:
1581
  error_result = {
@@ -1587,26 +1595,21 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
1587
  error_result["suggested_slice_y"] = y_safety["suggested_slice"]
1588
  return error_result, None
1589
 
1590
- # Read X data
1591
  if x_slice_str and x_slice_str.strip():
1592
  x_idx = parse_slice(x_slice_str)
1593
- x_data = x_var[x_idx]
1594
  else:
1595
- x_data = x_var[:]
1596
 
1597
- # Read Y data
1598
  if y_slice_str and y_slice_str.strip():
1599
  y_idx = parse_slice(y_slice_str)
1600
- y_data = y_var[y_idx]
1601
  else:
1602
- y_data = y_var[:]
1603
-
1604
- # Convert to numpy and flatten
1605
- if not isinstance(x_data, np.ndarray):
1606
- x_data = np.array(x_data)
1607
- if not isinstance(y_data, np.ndarray):
1608
- y_data = np.array(y_data)
1609
 
 
1610
  x_data = x_data.flatten()
1611
  y_data = y_data.flatten()
1612
 
@@ -1616,6 +1619,8 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
1616
  "error": f"X and Y data length mismatch after slicing: {len(x_data)} vs {len(y_data)}",
1617
  "x_slice": x_slice_str if x_slice_str else "no slice",
1618
  "y_slice": y_slice_str if y_slice_str else "no slice",
 
 
1619
  "suggestion": "Adjust slices to produce equal-length arrays",
1620
  "status": "failed"
1621
  }, None
@@ -1623,7 +1628,13 @@ def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str
1623
  finally:
1624
  f.close()
1625
 
1626
- # Calculate correlation
 
 
 
 
 
 
1627
  correlation = float(np.corrcoef(x_data, y_data)[0, 1])
1628
 
1629
  # Linear regression
 
1538
  Tuple of (status dict, path to saved HTML file)
1539
  """
1540
  import plotly.graph_objects as go
1541
+
1542
+ try:
1543
+ from scipy import stats
1544
+ except ImportError:
1545
+ return {
1546
+ "error": "scipy library not available. Install with: pip install scipy",
1547
+ "status": "failed"
1548
+ }, None
1549
 
1550
  try:
1551
  if not file_path:
 
1562
  f, file_type = open_file_with_fallback(file_path)
1563
 
1564
  try:
1565
+ # Get X and Y datasets (may be the same variable)
1566
  if file_type == "HDF5":
1567
  x_var = f[x_dataset_path]
1568
+ # Get Y separately even if same path (avoid reference issues)
1569
  y_var = f[y_dataset_path]
1570
  else:
1571
  x_var = f.variables[x_dataset_path]
 
1583
  error_result["suggested_slice_x"] = x_safety["suggested_slice"]
1584
  return error_result, None
1585
 
1586
+ # Check memory safety for Y (use same variable shape if same dataset)
1587
  y_safety = check_memory_safety(y_var.shape, y_var.dtype, y_slice_str, memory_limit_mb)
1588
  if not y_safety["safe"]:
1589
  error_result = {
 
1595
  error_result["suggested_slice_y"] = y_safety["suggested_slice"]
1596
  return error_result, None
1597
 
1598
+ # Read X data - parse slice and read
1599
  if x_slice_str and x_slice_str.strip():
1600
  x_idx = parse_slice(x_slice_str)
1601
+ x_data = np.array(x_var[x_idx]) # Force copy
1602
  else:
1603
+ x_data = np.array(x_var[:]) # Force copy
1604
 
1605
+ # Read Y data - parse slice and read
1606
  if y_slice_str and y_slice_str.strip():
1607
  y_idx = parse_slice(y_slice_str)
1608
+ y_data = np.array(y_var[y_idx]) # Force copy
1609
  else:
1610
+ y_data = np.array(y_var[:]) # Force copy
 
 
 
 
 
 
1611
 
1612
+ # Flatten both arrays
1613
  x_data = x_data.flatten()
1614
  y_data = y_data.flatten()
1615
 
 
1619
  "error": f"X and Y data length mismatch after slicing: {len(x_data)} vs {len(y_data)}",
1620
  "x_slice": x_slice_str if x_slice_str else "no slice",
1621
  "y_slice": y_slice_str if y_slice_str else "no slice",
1622
+ "x_shape_after_slice": x_data.shape,
1623
+ "y_shape_after_slice": y_data.shape,
1624
  "suggestion": "Adjust slices to produce equal-length arrays",
1625
  "status": "failed"
1626
  }, None
 
1628
  finally:
1629
  f.close()
1630
 
1631
+ # Calculate correlation (check for valid data)
1632
+ if len(x_data) < 2:
1633
+ return {
1634
+ "error": f"Not enough data points for correlation: {len(x_data)} points (need at least 2)",
1635
+ "status": "failed"
1636
+ }, None
1637
+
1638
  correlation = float(np.corrcoef(x_data, y_data)[0, 1])
1639
 
1640
  # Linear regression