Spaces:

MCP-1st-Birthday
/

HDF5-NetCDF-MCP

Sleeping

App Files Files Community

HDF5-NetCDF-MCP / app.py

JG1310

Update app.py

7a84873 verified 3 months ago

raw

history blame contribute delete

141 kB

	#!/usr/bin/env python3
	"""
	HDF5/NetCDF MCP Server - Gradio Implementation
	A Model Context Protocol server for reading and analyzing HDF5 and NetCDF scientific data files.
	"""

	import gradio as gr
	import h5py
	import netCDF4 as nc
	import numpy as np
	import pandas as pd
	import json
	import traceback
	from typing import Dict, Any, Optional, Tuple, List
	from pathlib import Path
	import tempfile
	import requests
	import plotly.graph_objects as go

	# Try to import h5netcdf for better NetCDF4 compatibility
	try:
	import h5netcdf
	HAS_H5NETCDF = True
	except ImportError:
	HAS_H5NETCDF = False


	# ============================================
	# UTILITY FUNCTIONS (NOT EXPOSED AS MCP TOOLS)
	# ============================================

	def parse_slice(slice_str: str):
	"""Parse a numpy-style slice string into a tuple of slice objects."""
	if not slice_str or not slice_str.strip():
	return slice(None)

	parts = []
	for part in slice_str.split(','):
	part = part.strip()
	if ':' in part:
	slice_parts = part.split(':')
	start = int(slice_parts[0]) if slice_parts[0] else None
	stop = int(slice_parts[1]) if len(slice_parts) > 1 and slice_parts[1] else None
	step = int(slice_parts[2]) if len(slice_parts) > 2 and slice_parts[2] else None
	parts.append(slice(start, stop, step))
	else:
	parts.append(int(part))

	return tuple(parts) if len(parts) > 1 else parts[0]


	def open_file_with_fallback(file_path: str) -> Tuple[Any, str]:
	"""
	Open HDF5/NetCDF file with 3-tier fallback for maximum compatibility.

	Returns:
	(file_object, file_type) tuple where file_type is "HDF5", "NetCDF", or "NetCDF_h5netcdf"
	"""
	file_path = Path(file_path)
	suffix = file_path.suffix.lower()

	if suffix in ['.h5', '.hdf5']:
	# Pure HDF5 files
	f = h5py.File(file_path, 'r')
	return f, "HDF5"

	elif suffix in ['.nc', '.nc4', '.netcdf']:
	# NetCDF files - try 3-tier fallback

	# TIER 1: Try netCDF4 (best)
	try:
	f = nc.Dataset(file_path, 'r')
	return f, "NetCDF"
	except Exception as nc_error:
	# TIER 2: Try h5netcdf
	if HAS_H5NETCDF:
	try:
	f = h5netcdf.File(file_path, 'r')
	return f, "NetCDF_h5netcdf"
	except Exception:
	pass

	# TIER 3: Try h5py
	try:
	f = h5py.File(file_path, 'r')
	return f, "HDF5"
	except Exception as h5_error:
	raise ValueError(f"Cannot open file. netCDF4 error: {nc_error}, h5py error: {h5_error}")

	else:
	raise ValueError(f"Unsupported file type: {suffix}")


	def get_file_structure(file_path: str) -> Dict[str, Any]:
	"""
	Get the structure of an HDF5/NetCDF file.

	Returns:
	Dictionary with file structure information and list of datasets
	"""
	try:
	f, file_type = open_file_with_fallback(file_path)

	structure = {"file_type": file_type}
	datasets = []

	if file_type == "HDF5":
	# HDF5 structure
	dataset_list = []
	def visitor(name, obj):
	if isinstance(obj, h5py.Dataset):
	dataset_list.append({
	"path": "/" + name,
	"shape": obj.shape,
	"dtype": str(obj.dtype)
	})

	f.visititems(visitor)
	structure["datasets"] = dataset_list

	# Create dropdown-friendly list
	for ds in dataset_list:
	display = f"{ds['path']} ({ds['shape']}, {ds['dtype']})"
	datasets.append(display)

	else:
	# NetCDF structure
	structure["dimensions"] = {name: len(dim) for name, dim in f.dimensions.items()}

	var_list = []
	for name, var in f.variables.items():
	var_list.append({
	"name": name,
	"dimensions": var.dimensions,
	"shape": var.shape,
	"dtype": str(var.dtype)
	})

	structure["variables"] = var_list

	# Create dropdown-friendly list
	for var in var_list:
	display = f"{var['name']} ({var['shape']}, {var['dtype']})"
	datasets.append(display)

	f.close()

	return {
	"structure": structure,
	"datasets": datasets,
	"success": True
	}

	except Exception as e:
	return {
	"error": str(e),
	"traceback": traceback.format_exc(),
	"success": False
	}


	def get_var_attrs_dict(var, file_type: str) -> dict:
	"""Get variable attributes as dict - handles all file types."""
	try:
	if file_type == "HDF5":
	return dict(var.attrs)
	elif file_type == "NetCDF":
	return {k: var.getncattr(k) for k in var.ncattrs()}
	elif file_type == "NetCDF_h5netcdf":
	return dict(var.attrs)
	else:
	return {}
	except Exception:
	return {}


	# ============================================
	# MEMORY SAFETY LIMITS (OVERRIDABLE)
	# ============================================

	# Size thresholds for automatic memory checks (in MB)
	SAFE_LOAD_MB = 100 # Green light - load without warning
	WARNING_LOAD_MB = 500 # Yellow - load but warn
	ERROR_LOAD_MB = 2000 # Red - refuse unless user overrides


	def check_memory_safety(data_shape: tuple, dtype, slice_str: Optional[str] = None,
	memory_limit_mb: Optional[float] = None) -> Dict[str, Any]:
	"""
	Check if loading data is safe based on size thresholds.

	Args:
	data_shape: Full shape of the dataset
	dtype: Data type of the dataset
	slice_str: Optional slice string to compute result shape
	memory_limit_mb: Optional custom memory limit. If None, uses tiered defaults.

	Returns:
	Dictionary with:
	- safe: bool - whether it's safe to proceed
	- estimated_mb: float - estimated memory usage
	- warning: str (optional) - warning message
	- error: str (optional) - error message
	- suggested_slice: str (optional) - suggested slice if too large
	"""
	# Compute result shape
	if slice_str and slice_str.strip():
	try:
	slices = parse_slice(slice_str)
	result_shape = compute_slice_shape(data_shape, slices)
	except Exception as e:
	return {"safe": False, "error": f"Invalid slice: {str(e)}"}
	else:
	result_shape = data_shape

	# Calculate memory
	result_elements = np.prod(result_shape)
	bytes_per_element = np.dtype(dtype).itemsize
	estimated_mb = result_elements * bytes_per_element / (1024 * 1024)

	result = {
	"estimated_mb": round(estimated_mb, 2),
	"result_shape": list(result_shape),
	"full_shape": list(data_shape)
	}

	# Use custom limit if provided
	if memory_limit_mb is not None:
	if estimated_mb <= memory_limit_mb:
	result["safe"] = True
	result["info"] = f"Within custom limit of {memory_limit_mb} MB"
	else:
	result["safe"] = False
	result["error"] = f"Exceeds custom limit of {memory_limit_mb} MB (estimated: {estimated_mb:.1f} MB)"
	if slice_str is None:
	result["suggested_slice"] = suggest_reasonable_slice(data_shape, dtype, target_mb=memory_limit_mb)
	return result

	# Use tiered defaults
	if estimated_mb < SAFE_LOAD_MB:
	result["safe"] = True
	result["info"] = "Safe to load"
	elif estimated_mb < WARNING_LOAD_MB:
	result["safe"] = True
	result["warning"] = f"Loading {estimated_mb:.1f} MB. Consider slicing for faster operations."
	elif estimated_mb < ERROR_LOAD_MB:
	result["safe"] = False
	result["error"] = f"Dataset too large ({estimated_mb:.1f} MB). Please use slicing."
	if slice_str is None:
	result["suggested_slice"] = suggest_reasonable_slice(data_shape, dtype)
	else:
	result["safe"] = False
	result["error"] = f"Dataset extremely large ({estimated_mb:.1f} MB). Maximum recommended: {ERROR_LOAD_MB} MB."
	result["suggested_slice"] = suggest_reasonable_slice(data_shape, dtype)

	return result


	def compute_slice_shape(full_shape: tuple, slices) -> tuple:
	"""Compute the resulting shape after applying slices"""
	if not isinstance(slices, tuple):
	slices = (slices,)

	# Pad slices with full-dimension slices if needed
	while len(slices) < len(full_shape):
	slices = slices + (slice(None),)

	result_shape = []
	for dim_size, s in zip(full_shape, slices):
	if isinstance(s, int):
	# Single index - dimension is removed
	continue
	elif isinstance(s, slice):
	start = s.start if s.start is not None else 0
	stop = s.stop if s.stop is not None else dim_size
	step = s.step if s.step is not None else 1

	# Handle negative indices
	if start < 0:
	start = dim_size + start
	if stop < 0:
	stop = dim_size + stop

	# Compute dimension size
	dim_result = max(0, (stop - start + step - 1) // step)
	result_shape.append(dim_result)

	return tuple(result_shape)


	def suggest_reasonable_slice(full_shape: tuple, dtype, target_mb: float = 100) -> str:
	"""Suggest a slice that would result in reasonable memory usage"""
	bytes_per_element = np.dtype(dtype).itemsize
	target_elements = int(target_mb * 1024 * 1024 / bytes_per_element)

	# Find the largest dimension
	max_dim_idx = np.argmax(full_shape)

	# Calculate how many elements of that dimension we can take
	other_dims_product = np.prod([s for i, s in enumerate(full_shape) if i != max_dim_idx])
	if other_dims_product > 0:
	max_allowed = max(1, int(target_elements / other_dims_product))
	max_allowed = min(max_allowed, full_shape[max_dim_idx])
	else:
	max_allowed = min(10, full_shape[max_dim_idx])

	# Build slice string
	slice_parts = []
	for i, dim_size in enumerate(full_shape):
	if i == max_dim_idx:
	slice_parts.append(f"0:{max_allowed}")
	else:
	slice_parts.append(":")

	return ", ".join(slice_parts)


	# ============================================
	# MCP TOOL FUNCTIONS (STATELESS - ALWAYS TAKE file_path)
	# ============================================

	def download_file(url: str) -> Dict[str, Any]:
	"""
	Download a HDF5/NetCDF file from a URL and parse its structure.

	CRITICAL: This tool downloads the file AND parses its structure. You do NOT need to
	call list_structure() after downloading - the structure is already in the result.

	Args:
	url: Direct URL to the HDF5 (.h5, .hdf5) or NetCDF (.nc, .nc4) file

	Returns:
	Dictionary with these keys:
	- file_path (str): Path to downloaded file - SAVE THIS, you need it for all other tools
	- structure (dict): Complete file structure (file_type, dimensions, variables/datasets)
	- datasets (list): Available dataset names with shapes/types
	- filename (str): Original filename
	- size_mb (float): File size in megabytes
	- status (str): Success/error message

	Return value structure:
	{
	"file_path": "/tmp/tmpXXX.nc", # ← Use this in other tools
	"structure": {
	"file_type": "NetCDF",
	"dimensions": {"time": 100, "lat": 90, "lon": 180},
	"variables": [{"name": "data", "shape": [100, 90, 180], ...}]
	},
	"datasets": ["data ((100, 90, 180), float32)", ...],
	"status": "Successfully downloaded..."
	}

	Usage pattern:
	1. Call download_file(url)
	2. Extract file_path from result: file_path = result["file_path"]
	3. Use file_path in other tools: compute_statistics(file_path, "dataset_name")

	Common mistake: Don't call list_structure() after download_file() - the structure
	is already in the download_file result, so that's redundant.
	"""
	try:
	if not url:
	return {"error": "Please provide a URL", "status": "failed"}

	# Download file
	headers = {'User-Agent': 'Mozilla/5.0'}
	response = requests.get(url, stream=True, timeout=60, headers=headers, allow_redirects=True)
	response.raise_for_status()

	# Check content type
	content_type = response.headers.get('content-type', '').lower()
	if 'text/html' in content_type:
	return {"error": "URL returned HTML instead of a file", "status": "failed"}

	# Determine extension
	url_path = Path(url)
	extension = url_path.suffix if url_path.suffix else '.nc'

	# Save to temp file
	with tempfile.NamedTemporaryFile(delete=False, suffix=extension, mode='wb') as tmp_file:
	for chunk in response.iter_content(chunk_size=8192):
	if chunk:
	tmp_file.write(chunk)
	tmp_path = tmp_file.name

	# Get file size
	file_size_mb = Path(tmp_path).stat().st_size / (1024 * 1024)

	# Parse structure
	structure_result = get_file_structure(tmp_path)

	if not structure_result.get("success"):
	return {
	"error": structure_result.get('error', 'Unknown error'),
	"status": "failed"
	}

	filename = url_path.name

	return {
	"file_path": tmp_path,
	"filename": filename,
	"size_mb": round(file_size_mb, 2),
	"structure": structure_result["structure"],
	"datasets": structure_result["datasets"],
	"status": f"Successfully downloaded {filename} ({file_size_mb:.2f} MB)"
	}

	except Exception as e:
	return {
	"error": str(e),
	"traceback": traceback.format_exc(),
	"status": "failed"
	}


	def read_dataset(file_path: str, dataset_path: str, slice_str: str = "",
	memory_limit_mb: Optional[float] = None) -> Dict[str, Any]:
	"""
	Read data from a specific dataset/variable.

	WHEN TO USE THIS:
	- When you need the actual data values for visualization or export
	- When compute_statistics() doesn't provide enough information
	- When you need to inspect specific data points

	WHEN NOT TO USE THIS:
	- If you only need statistics (min, max, mean, etc.) → use compute_statistics() instead
	- If the dataset is > 10,000 elements → use compute_statistics(), export_data(), or visualization tools

	Token efficiency note: Large datasets are truncated to prevent wasting tokens. If you see
	"serialized": false in the result, the tool is telling you to use a different approach.

	Args:
	file_path: Path to the HDF5/NetCDF file
	dataset_path: Path to the dataset within the file (e.g., 'variable_name', '/group/data')
	slice_str: Optional numpy-style slice (e.g., '0:10, :, 5' = first 10 in dim 0, all of dim 1, index 5 of dim 2)
	memory_limit_mb: Optional custom memory limit in MB. If not specified, uses tiered
	defaults (100 MB safe, 500 MB warning, 2000 MB max). Set higher to
	override defaults if you have more memory available.

	Returns:
	Dictionary containing:
	- data (list): Actual data values (only if < 10,000 elements; otherwise see alternatives)
	- shape (list): Shape of the data
	- dtype (str): Data type
	- size_mb (float): Memory size
	- num_elements (int): Total elements
	- serialized (bool): True if full data included, False if truncated
	- alternatives (list): Suggested alternative tools if data was truncated

	Return value structure (small dataset):
	{
	"data": [[1.2, 3.4], [5.6, 7.8]], # ← Full data array
	"shape": [2, 2],
	"dtype": "float32",
	"serialized": true
	}

	Return value structure (large dataset):
	{
	"serialized": false, # ← Data NOT included
	"preview_first_100": [1.2, 3.4, ...],
	"alternatives": [
	"Use compute_statistics() to get summary stats",
	"Use export_data() to save to file",
	"Use create_histogram() to visualize distribution"
	],
	"shape": [1000, 90, 180],
	"num_elements": 16200000
	}
	"""
	try:
	if not file_path:
	return {"error": "Please provide a file path"}
	if not dataset_path:
	return {"error": "Please specify a dataset path"}

	# Extract clean path if it has shape/dtype info
	dataset_path = dataset_path.split(" (")[0] if " (" in dataset_path else dataset_path

	# Open file
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get dataset/variable
	if file_type == "HDF5":
	var = f[dataset_path]
	else:
	var = f.variables[dataset_path]

	# Check memory safety before loading
	safety_check = check_memory_safety(var.shape, var.dtype, slice_str, memory_limit_mb)

	if not safety_check["safe"]:
	# Return error with clear override instructions
	error_result = {
	"error": safety_check["error"],
	"estimated_size_mb": safety_check["estimated_mb"],
	"full_shape": safety_check["full_shape"],
	"result_shape": safety_check["result_shape"],
	"override_option": f"To load this data anyway, set memory_limit_mb={int(safety_check['estimated_mb'] * 1.2)} or higher",
	"default_limits": "Default limits: 100 MB (safe), 500 MB (warning), 2000 MB (max)"
	}
	if "suggested_slice" in safety_check:
	error_result["suggested_slice"] = safety_check["suggested_slice"]
	error_result["alternative"] = f"Or use slicing: slice_str='{safety_check['suggested_slice']}'"
	return error_result

	# Read data with optional slicing
	if slice_str and slice_str.strip():
	idx = parse_slice(slice_str)
	data = var[idx]
	else:
	data = var[:]

	# Convert to numpy array
	if not isinstance(data, np.ndarray):
	data = np.array(data)

	result = {
	"shape": list(data.shape),
	"dtype": str(data.dtype),
	"size_mb": round(data.nbytes / (1024 * 1024), 2),
	"num_elements": int(data.size)
	}

	# Limit serialization to avoid excessive token usage in LLM context
	# The LLM sees the full JSON response and large arrays waste tokens
	MAX_SERIALIZE_ELEMENTS = 10000

	if data.size > MAX_SERIALIZE_ELEMENTS:
	result["serialized"] = False
	result["note"] = f"Dataset has {data.size:,} elements. Only preview returned to save tokens."
	result["token_saving_info"] = f"Full data would use ~{data.size * 10} tokens. Use export_data or create_histogram for large datasets."

	# Provide preview
	flat_data = data.flatten()
	result["preview_first_100"] = flat_data[:100].tolist()
	result["preview_last_100"] = flat_data[-100:].tolist()
	result["preview_shape_info"] = f"Showing first/last 100 of {data.size:,} elements"

	# Suggest alternatives
	result["alternatives"] = [
	f"Use compute_statistics() to get summary stats without loading all data",
	f"Use export_data() to save to file for external analysis",
	f"Use create_histogram() to visualize distribution",
	f"Use slicing to load smaller subset: slice_str='{suggest_reasonable_slice(data.shape, data.dtype, target_mb=50)}'"
	]
	else:
	result["serialized"] = True
	result["data"] = data.tolist()

	# Add warning if present
	if "warning" in safety_check:
	result["warning"] = safety_check["warning"]

	return result

	finally:
	f.close()

	except Exception as e:
	return {"error": f"Failed to read dataset: {str(e)}", "traceback": traceback.format_exc()}


	def get_dataset_preview(file_path: str, dataset_path: str, n_preview: int = 10) -> Dict[str, Any]:
	"""
	Get a safe preview of a dataset without loading the entire array.

	USE CASE: Quick peek at data values without loading large arrays.

	WHEN TO USE THIS:
	- You want to see a few sample values to understand the data
	- You need to verify data looks reasonable before processing
	- You want to check data format/structure with minimal memory use
	- Dataset is large but you only need to see representative values

	WHEN NOT TO USE THIS:
	- You need statistics → use compute_statistics() instead (more efficient)
	- You need all the data → use read_dataset() for small datasets or export_data() for large ones
	- You want to visualize → use create_histogram(), create_line_plot(), etc. instead

	PREVIEW BEHAVIOR:
	- Returns first N elements from flattened array (default: 10)
	- Safe for any dataset size - only loads requested elements
	- Provides context: shape, dtype, total size

	Args:
	file_path: Path to the HDF5/NetCDF file
	dataset_path: Path to the dataset within the file
	n_preview: Number of elements to preview (default: 10, range: 1-1000)

	Returns:
	Dictionary with keys:
	- preview (list): First n_preview elements from the dataset
	- shape (list): Full shape of the dataset
	- dtype (str): Data type
	- size_mb (float): Total memory size in MB
	- num_elements (int): Total number of elements
	- preview_count (int): Number of elements actually returned

	Return value structure:
	{
	"preview": [15.3, 14.2, 16.8, 15.1, ...], # ← First 10 values
	"shape": [1750, 90, 180],
	"dtype": "float32",
	"size_mb": 113.4,
	"num_elements": 28350000,
	"preview_count": 10
	}

	Example workflow - Quick data check:
	# Preview data to verify it looks reasonable
	preview = get_dataset_preview(file_path, "temperature", n_preview=20)
	sample_values = preview["preview"] # First 20 values

	# Check if values are in expected range
	if all(-50 < v < 50 for v in sample_values):
	# Looks good, now get full statistics
	stats = compute_statistics(file_path, "temperature")
	"""
	try:
	if not file_path:
	return {"error": "Please provide a file path"}
	if not dataset_path:
	return {"error": "Please specify a dataset path"}

	# Extract clean path
	dataset_path = dataset_path.split(" (")[0] if " (" in dataset_path else dataset_path

	# Open file
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get dataset/variable
	if file_type == "HDF5":
	var = f[dataset_path]
	else:
	var = f.variables[dataset_path]

	shape = var.shape
	dtype = var.dtype
	num_elements = int(np.prod(shape)) if len(shape) > 0 else 1
	size_mb = num_elements * dtype.itemsize / (1024**2)

	result = {
	"dataset_path": dataset_path,
	"shape": list(shape),
	"dtype": str(dtype),
	"size_mb": round(size_mb, 2),
	"num_elements": num_elements
	}

	# Get preview data
	if num_elements == 0:
	result["preview_data"] = []
	result["note"] = "Dataset is empty"
	elif len(shape) == 0:
	# Scalar
	val = var[()]
	result["preview_data"] = val.item() if isinstance(val, np.generic) else val
	elif len(shape) == 1:
	# 1D array
	n = min(n_preview, shape[0])
	result["first_n"] = var[:n].tolist()
	if shape[0] > n:
	result["last_n"] = var[-n:].tolist()
	else:
	# Multi-dimensional
	sample_slice = [slice(0, min(n_preview, shape[0]))] + [0] * (len(shape) - 1)
	sample_data = var[tuple(sample_slice)]
	result["first_n_elements"] = np.array(sample_data).flatten()[:n_preview].tolist()
	result["note"] = f"Showing first {min(n_preview, len(result['first_n_elements']))} of {num_elements} elements"

	return result

	finally:
	f.close()

	except Exception as e:
	return {"error": f"Failed to preview dataset: {str(e)}", "traceback": traceback.format_exc()}


	def get_attributes(file_path: str, path: str = "") -> Dict[str, Any]:
	"""
	Get metadata attributes for a file, group, or dataset.

	USE CASE: Understanding what variables mean, their units, descriptions, etc.

	COMMON ATTRIBUTES TO LOOK FOR:
	- units: Physical units of the data (e.g., "kelvin", "meters", "days since 1800-01-01")
	- long_name: Human-readable description of the variable
	- standard_name: CF-convention standard name
	- scale_factor / add_offset: Linear transformation parameters for packed data
	- _FillValue: Value representing missing/invalid data
	- valid_range / valid_min / valid_max: Valid data bounds

	WHEN TO USE THIS:
	- Before analyzing data: check units and understand what the values represent
	- To find file-level metadata: leave path empty
	- To understand coordinate variables: especially for time (to convert indices to dates)
	- To check for data quality flags or processing history

	Args:
	file_path: Path to the HDF5/NetCDF file
	path: Path to dataset/group (empty string "" for file-level attributes)

	Returns:
	Dictionary with keys:
	- attributes (dict): All metadata attributes as key-value pairs
	- path (str): Path that was queried ("root" if file-level)

	Return value structure:
	{
	"attributes": {
	"long_name": "Surface temperature anomaly",
	"units": "K",
	"scale_factor": 0.01,
	"_FillValue": 32767,
	"cell_methods": "time: mean"
	},
	"path": "temperature"
	}

	Example workflow - Understanding time coordinate:
	# Get time attributes to understand how to interpret time values
	time_attrs = get_attributes(file_path, "time")
	units = time_attrs["attributes"]["units"] # e.g., "days since 1800-01-01"
	# Now you know how to interpret time values from index_to_coordinate()
	"""
	try:
	if not file_path:
	return {"error": "Please provide a file path"}

	# Extract clean path
	path = path.split(" (")[0] if path and " (" in path else path

	# Open file
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get the object
	if path:
	if file_type == "HDF5":
	obj = f[path]
	else:
	obj = f.variables[path]
	else:
	obj = f

	# Get attributes
	if file_type == "HDF5":
	attrs = dict(obj.attrs)
	elif file_type == "NetCDF":
	attrs = {k: obj.getncattr(k) for k in obj.ncattrs()}
	elif file_type == "NetCDF_h5netcdf":
	attrs = dict(obj.attrs)
	else:
	attrs = {}

	# Convert numpy types to native Python
	for key, value in attrs.items():
	if isinstance(value, np.generic):
	attrs[key] = value.item()

	return {"attributes": attrs, "path": path if path else "root"}

	finally:
	f.close()

	except Exception as e:
	return {"error": f"Failed to get attributes: {str(e)}", "traceback": traceback.format_exc()}


	def get_dimensions(file_path: str, dataset_path: str = "") -> Dict[str, Any]:
	"""
	Get dimension information for a dataset or file.

	USE CASE: Understanding data structure before slicing or analyzing.

	WHEN TO USE THIS:
	- To see what dimensions a dataset has and their sizes
	- To understand how to construct slice strings
	- For NetCDF files with empty dataset_path: get ALL dimensions in the file

	DO NOT use this if:
	- You just downloaded a file → the structure is already in download_file() result
	- You already ran list_structure() → the dimensions are in that result

	Args:
	file_path: Path to the HDF5/NetCDF file
	dataset_path: Path to dataset (empty "" for NetCDF shows all dimensions, required for HDF5)

	Returns:
	For specific dataset:
	- dimensions (list): Dimension names (NetCDF only)
	- shape (list): Size of each dimension

	For NetCDF with empty path:
	- dimensions (dict): All dimensions in file as {name: size}

	Return value structure (specific dataset):
	{
	"dimensions": ["time", "lat", "lon"],
	"shape": [1750, 90, 180]
	}

	Return value structure (NetCDF, all dimensions):
	{
	"dimensions": {
	"time": 1750,
	"lat": 90,
	"lon": 180,
	"nv": 2
	}
	}

	Example workflow - Planning a slice:
	dims = get_dimensions(file_path, "temperature")
	# dims["shape"] = [1750, 90, 180]
	# Now you know: dimension 0 has 1750 elements, dim 1 has 90, dim 2 has 180
	# To get first 10 time steps, all lat, all lon: slice_str = "0:10, :, :"
	"""
	try:
	if not file_path:
	return {"error": "Please provide a file path"}

	# Extract clean path
	dataset_path = dataset_path.split(" (")[0] if dataset_path and " (" in dataset_path else dataset_path

	# Open file
	f, file_type = open_file_with_fallback(file_path)

	try:
	if file_type == "HDF5":
	if not dataset_path:
	return {"error": "dataset_path required for HDF5 files"}
	shape = f[dataset_path].shape
	return {"shape": list(shape), "ndim": len(shape)}
	else:
	# NetCDF
	if dataset_path:
	var = f.variables[dataset_path]
	return {
	"dimensions": var.dimensions,
	"shape": list(var.shape)
	}
	else:
	return {
	"dimensions": {name: len(dim) for name, dim in f.dimensions.items()}
	}

	finally:
	f.close()

	except Exception as e:
	return {"error": f"Failed to get dimensions: {str(e)}", "traceback": traceback.format_exc()}


	def list_structure(file_path: str) -> Dict[str, Any]:
	"""
	List the complete hierarchical structure of the file.

	WHEN TO USE THIS:
	- You need to browse available datasets/variables
	- You don't know the names of variables in the file
	- You need to see the complete file organization

	WHEN NOT TO USE THIS:
	- Right after download_file() → structure is already in that result
	- You already know the dataset name → just use it directly in other tools
	- File has > 100 datasets → output will be truncated to save tokens

	TOKEN EFFICIENCY NOTE: For files with many datasets, this function truncates
	the output to avoid wasting tokens. If you know specific dataset names, use them
	directly instead of browsing.

	Args:
	file_path: Path to the HDF5/NetCDF file

	Returns:
	Dictionary containing:
	- file_type (str): "HDF5" or "NetCDF"
	- dimensions (dict): For NetCDF, dimensions and their sizes
	- variables (list): For NetCDF, list of variables with metadata
	- datasets (list): For HDF5, list of datasets with metadata
	- token_saving_info (dict): If file is large, explanation of truncation

	Return value structure (NetCDF):
	{
	"file_type": "NetCDF",
	"dimensions": {"time": 100, "lat": 90, "lon": 180},
	"variables": [
	{
	"name": "temperature",
	"dimensions": ["time", "lat", "lon"],
	"shape": [100, 90, 180],
	"dtype": "float32"
	},
	...
	]
	}

	Common mistake: Calling this after download_file() is redundant - you already
	have the structure from download_file()["structure"].
	"""
	try:
	if not file_path:
	return {"error": "Please provide a file path"}

	# Get the full structure
	result = get_file_structure(file_path)

	if not result.get("success"):
	return {"error": result.get("error", "Unknown error")}

	structure = result["structure"]

	# Count total items
	if structure.get("file_type") == "HDF5":
	total_items = len(structure.get("datasets", []))
	item_type = "datasets"
	else: # NetCDF
	total_items = len(structure.get("variables", []))
	item_type = "variables"

	# Token-saving thresholds
	SAFE_LIMIT = 100 # Return full list
	WARNING_LIMIT = 500 # Return full list with warning
	TRUNCATE_LIMIT = 500 # Truncate and provide summary

	if total_items <= SAFE_LIMIT:
	# Small file - return everything
	return structure

	elif total_items <= WARNING_LIMIT:
	# Medium file - return everything but warn about token usage
	structure["token_warning"] = f"File has {total_items} {item_type}. Consider using specific dataset names instead of browsing full structure."
	return structure

	else:
	# Large file - truncate to save tokens
	if structure.get("file_type") == "HDF5":
	full_list = structure.get("datasets", [])
	structure["datasets"] = full_list[:100]
	structure["datasets_truncated"] = True
	structure["total_datasets"] = total_items
	structure["showing_first"] = 100
	else: # NetCDF
	full_list = structure.get("variables", [])
	structure["variables"] = full_list[:100]
	structure["variables_truncated"] = True
	structure["total_variables"] = total_items
	structure["showing_first"] = 100

	structure["token_saving_info"] = {
	"message": f"File has {total_items} {item_type}. Only showing first 100 to save tokens.",
	"estimated_tokens_saved": f"~{(total_items - 100) * 50:,} tokens",
	"full_token_cost": f"Full list would use ~{total_items * 50:,} tokens",
	"alternatives": [
	"If you know the dataset name, use read_dataset() or get_attributes() directly",
	"Use file metadata or documentation to identify dataset names",
	"Search for specific patterns if available in your workflow"
	]
	}

	return structure

	except Exception as e:
	return {"error": f"Failed to list structure: {str(e)}", "traceback": traceback.format_exc()}


	def compute_statistics(file_path: str, dataset_path: str, slice_str: str = "",
	memory_limit_mb: Optional[float] = None) -> Dict[str, Any]:
	"""
	Compute basic statistics for a numeric dataset.

	PREFERRED TOOL: Use this instead of read_dataset() when you only need statistics.
	This is more efficient and avoids loading full arrays into memory unnecessarily.

	Common use cases:
	- Comparing values between time periods (e.g., "compare mean in period A vs period B")
	- Finding data ranges (min/max)
	- Data quality checks (checking for NaN values)
	- Quick data exploration before visualization

	Args:
	file_path: Path to the HDF5/NetCDF file
	dataset_path: Path to the dataset within the file
	slice_str: Optional numpy-style slice (e.g., "100:200, :, :" for rows 100-200)
	memory_limit_mb: Optional custom memory limit in MB. If not specified, uses tiered
	defaults (100 MB safe, 500 MB warning, 2000 MB max). Set higher to
	override defaults if you have more memory available.

	Returns:
	Dictionary with keys:
	- statistics (dict): Contains 'min', 'max', 'mean', 'std', 'median'
	- shape (list): Shape of the data analyzed
	- dtype (str): Data type
	- size_mb (float): Memory size in MB
	- num_elements (int): Total number of elements
	- nan_count (int): Number of NaN values (for floating point data)
	- nan_fraction (float): Fraction of values that are NaN

	Return value structure:
	{
	"statistics": {
	"min": -5.2,
	"max": 42.1,
	"mean": 15.3, # ← Access directly with result["statistics"]["mean"]
	"std": 8.7,
	"median": 14.2
	},
	"shape": [12, 90, 180],
	"num_elements": 194400,
	"nan_count": 0
	}

	Example workflow - Comparing time periods:
	# INEFFICIENT (don't do this):
	result1 = read_dataset(file, "var", "0:12, :, :") # loads full data
	result2 = read_dataset(file, "var", "100:112, :, :") # loads full data
	# then manually calculate means from the data arrays

	# EFFICIENT (do this):
	stats1 = compute_statistics(file, "var", "0:12, :, :") # computes stats directly
	stats2 = compute_statistics(file, "var", "100:112, :, :") # computes stats directly
	difference = stats2["statistics"]["mean"] - stats1["statistics"]["mean"]
	"""
	try:
	if not file_path:
	return {"error": "Please provide a file path"}
	if not dataset_path:
	return {"error": "Please specify a dataset path"}

	# Extract clean path
	dataset_path = dataset_path.split(" (")[0] if " (" in dataset_path else dataset_path

	# Open file
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get dataset/variable
	if file_type == "HDF5":
	var = f[dataset_path]
	else:
	var = f.variables[dataset_path]

	# Check memory safety before loading
	safety_check = check_memory_safety(var.shape, var.dtype, slice_str, memory_limit_mb)

	if not safety_check["safe"]:
	# Return error with clear override instructions
	error_result = {
	"error": safety_check["error"],
	"estimated_size_mb": safety_check["estimated_mb"],
	"full_shape": safety_check["full_shape"],
	"result_shape": safety_check["result_shape"],
	"override_option": f"To compute statistics anyway, set memory_limit_mb={int(safety_check['estimated_mb'] * 1.2)} or higher",
	"default_limits": "Default limits: 100 MB (safe), 500 MB (warning), 2000 MB (max)"
	}
	if "suggested_slice" in safety_check:
	error_result["suggested_slice"] = safety_check["suggested_slice"]
	error_result["alternative"] = f"Or use slicing: slice_str='{safety_check['suggested_slice']}'"
	return error_result

	# Read data with optional slicing
	if slice_str and slice_str.strip():
	idx = parse_slice(slice_str)
	data = var[idx]
	else:
	data = var[:]

	# Convert to numpy array
	if not isinstance(data, np.ndarray):
	data = np.array(data)

	# Check if numeric
	if not np.issubdtype(data.dtype, np.number):
	return {"error": f"Dataset is not numeric (dtype: {data.dtype})"}

	# Compute statistics
	stats = {
	"min": float(np.nanmin(data)),
	"max": float(np.nanmax(data)),
	"mean": float(np.nanmean(data)),
	"std": float(np.nanstd(data)),
	"median": float(np.nanmedian(data)),
	"shape": list(data.shape),
	"dtype": str(data.dtype),
	"size_mb": round(data.nbytes / (1024 * 1024), 2),
	"num_elements": int(data.size)
	}

	# Add NaN info if floating point
	if np.issubdtype(data.dtype, np.floating):
	num_nan = int(np.sum(np.isnan(data)))
	stats["nan_count"] = num_nan
	stats["nan_fraction"] = float(num_nan / data.size) if data.size > 0 else 0.0

	result = {"statistics": stats}

	# Add warning if present
	if "warning" in safety_check:
	result["warning"] = safety_check["warning"]

	return result

	finally:
	f.close()

	except Exception as e:
	return {"error": f"Failed to compute statistics: {str(e)}", "traceback": traceback.format_exc()}


	def export_data(file_path: str, dataset_path: str, format: str = "csv",
	slice_str: str = "", max_rows: int = 100000,
	memory_limit_mb: Optional[float] = None) -> Dict[str, Any]:
	"""
	Export dataset to CSV or JSON format for external analysis.

	WHEN TO USE THIS:
	- You need to analyze data in external tools (Excel, Python, R, etc.)
	- You want to save processed/sliced data
	- Dataset is too large to view in chat but small enough to export

	WHEN NOT TO USE THIS:
	- Just to check a few values → use get_dataset_preview() instead
	- Just need statistics → use compute_statistics() instead
	- Want to visualize → use create_histogram(), create_line_plot(), etc. instead

	FORMAT NOTES:
	- CSV: Best for tabular data, has row limits (default 100,000)
	- JSON: Better for hierarchical data or very large datasets

	Args:
	file_path: Path to the HDF5/NetCDF file
	dataset_path: Path to the dataset within the file
	format: Export format - "csv" or "json"
	slice_str: Optional numpy-style slice to export subset
	max_rows: Maximum rows for CSV export (safety limit)
	memory_limit_mb: Optional custom memory limit in MB. If not specified, uses tiered
	defaults (100 MB safe, 500 MB warning, 2000 MB max).

	Returns:
	Dictionary with keys:
	- status (str): "success" or error
	- file_path (str): Path to exported file (downloadable)
	- format (str): Export format used
	- size_mb (float): Size of exported file
	- shape (list): Shape of exported data
	- total_elements (int): Total elements exported

	Return value structure:
	{
	"status": "success",
	"file_path": "/tmp/exported_data.csv", # ← Download this file
	"format": "CSV",
	"size_mb": 1.5,
	"shape": [1000, 90],
	"total_elements": 90000
	}
	"""
	try:
	if not file_path:
	return {"error": "Please provide a file path"}
	if not dataset_path:
	return {"error": "Please specify a dataset path"}

	# Extract clean path
	dataset_path = dataset_path.split(" (")[0] if " (" in dataset_path else dataset_path

	# Open file
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get dataset/variable
	if file_type == "HDF5":
	var = f[dataset_path]
	else:
	var = f.variables[dataset_path]

	# Check memory safety before loading
	safety_check = check_memory_safety(var.shape, var.dtype, slice_str, memory_limit_mb)

	if not safety_check["safe"]:
	# Return error with clear override instructions
	error_result = {
	"error": safety_check["error"],
	"estimated_size_mb": safety_check["estimated_mb"],
	"full_shape": safety_check["full_shape"],
	"result_shape": safety_check["result_shape"],
	"override_option": f"To export anyway, set memory_limit_mb={int(safety_check['estimated_mb'] * 1.2)} or higher",
	"default_limits": "Default limits: 100 MB (safe), 500 MB (warning), 2000 MB (max)"
	}
	if "suggested_slice" in safety_check:
	error_result["suggested_slice"] = safety_check["suggested_slice"]
	error_result["alternative"] = f"Or use slicing: slice_str='{safety_check['suggested_slice']}'"
	return error_result

	# Read data with optional slicing
	if slice_str and slice_str.strip():
	idx = parse_slice(slice_str)
	data = var[idx]
	else:
	data = var[:]

	# Convert to numpy array
	if not isinstance(data, np.ndarray):
	data = np.array(data)

	# Get metadata
	metadata = get_var_attrs_dict(var, file_type)

	# Check size limits for CSV
	total_elements = np.prod(data.shape)
	if total_elements > max_rows and format == "csv":
	return {"error": f"Dataset too large for CSV ({total_elements} elements). Maximum is {max_rows}. Use slicing or JSON format."}

	# Create export file
	if format == "csv":
	tmp_path = tempfile.mktemp(suffix='.csv')

	# Handle different dimensions
	if data.ndim == 1:
	df = pd.DataFrame({
	'index': np.arange(len(data)),
	'value': data
	})
	elif data.ndim == 2:
	df = pd.DataFrame(data)
	df.columns = [f'col_{i}' for i in range(data.shape[1])]
	else:
	# Flatten with multi-index
	indices = np.array(np.unravel_index(np.arange(data.size), data.shape)).T
	df = pd.DataFrame(indices, columns=[f'dim_{i}' for i in range(data.ndim)])
	df['value'] = data.flatten()

	# Apply row limit
	if len(df) > max_rows:
	df = df.head(max_rows)

	# Write with metadata as comments
	with open(tmp_path, 'w') as f_out:
	f_out.write(f"# Dataset: {dataset_path}\n")
	f_out.write(f"# Shape: {data.shape}\n")
	f_out.write(f"# Dtype: {data.dtype}\n")
	if metadata:
	f_out.write("# Metadata:\n")
	for key, value in metadata.items():
	f_out.write(f"# {key}: {value}\n")
	f_out.write("\n")
	df.to_csv(f_out, index=False)

	elif format == "json":
	tmp_path = tempfile.mktemp(suffix='.json')

	export_obj = {
	"dataset": dataset_path,
	"shape": list(data.shape),
	"dtype": str(data.dtype),
	"data": data.tolist(),
	"metadata": metadata
	}

	with open(tmp_path, 'w') as f_out:
	json.dump(export_obj, f_out, indent=2)

	else:
	return {"error": f"Unsupported format: {format}"}

	# Get file size
	file_size_mb = Path(tmp_path).stat().st_size / (1024 * 1024)

	return {
	"status": "success",
	"file_path": tmp_path,
	"format": format.upper(),
	"size_mb": round(file_size_mb, 2),
	"shape": list(data.shape),
	"dtype": str(data.dtype),
	"total_elements": int(total_elements)
	}

	finally:
	f.close()

	except Exception as e:
	return {"error": f"Failed to export: {str(e)}", "traceback": traceback.format_exc()}


	def create_histogram(file_path: str, dataset_path: str, slice_str: str = "",
	title_override: str = "", xlabel_override: str = "",
	nbins: int = 50) -> Tuple[Dict[str, Any], Optional[str]]:
	"""
	Create histogram showing distribution of values.

	USE CASE: Visualize the distribution of a dataset to understand its characteristics.

	WHEN TO USE THIS:
	- Understand data distribution (normal, skewed, bimodal, etc.)
	- Identify outliers or unusual values
	- Quality control - check if data looks reasonable
	- Compare with expected distributions

	OUTPUT: Returns both JSON status (with statistics) AND downloadable HTML file.
	The HTML file contains an interactive Plotly histogram viewable in any browser.

	Args:
	file_path: Path to the HDF5/NetCDF file
	dataset_path: Path to dataset/variable
	slice_str: Optional numpy-style slice notation to plot subset
	title_override: Optional custom title (leave empty for automatic)
	xlabel_override: Optional custom x-axis label (leave empty for automatic)
	nbins: Number of bins for histogram (default: 50, range: 5-200)

	Returns:
	Tuple of (status_dict, html_file_path):
	- status_dict: Contains statistics and status info
	- html_file_path: Path to interactive HTML file (downloadable)

	Return value structure:
	(
	{
	"status": "success",
	"html_file": "/tmp/plot123.html", # ← Download this
	"statistics": {
	"mean": 15.3,
	"median": 14.2,
	"min": -5.2,
	"max": 42.1,
	"std": 8.7,
	"num_bins": 50,
	"num_values": 16200
	}
	},
	"/tmp/plot123.html" # HTML file path
	)

	Workflow:
	result, html_path = create_histogram(file_path, "variable_name")
	# Download the HTML file from html_path to view the interactive plot
	# Statistics are in result["statistics"]
	"""
	import plotly.graph_objects as go

	try:
	if not file_path:
	return {
	"error": "No file path provided",
	"status": "failed"
	}, None

	if not dataset_path:
	return {
	"error": "No dataset path provided",
	"status": "failed"
	}, None

	# Extract clean path
	dataset_path = dataset_path.split(" (")[0] if " (" in dataset_path else dataset_path

	# Open file and read data directly (bypassing read_dataset's serialization limit)
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get dataset/variable
	if file_type == "HDF5":
	var = f[dataset_path]
	else:
	var = f.variables[dataset_path]

	# Check memory safety before loading
	safety_check = check_memory_safety(var.shape, var.dtype, slice_str)

	if not safety_check["safe"]:
	# Return error with suggestion
	error_result = {
	"error": safety_check["error"],
	"estimated_size_mb": safety_check["estimated_mb"],
	"status": "failed"
	}
	if "suggested_slice" in safety_check:
	error_result["suggested_slice"] = safety_check["suggested_slice"]
	error_result["suggestion"] = f"Try using: slice_str='{safety_check['suggested_slice']}'"
	return error_result, None

	# Read data with optional slicing
	if slice_str and slice_str.strip():
	idx = parse_slice(slice_str)
	data = var[idx]
	else:
	data = var[:]

	# Keep masked array for statistics
	data_for_stats = data

	# Convert to filled array for plotting
	if isinstance(data, np.ma.MaskedArray):
	data_for_plot = data.filled(np.nan)
	else:
	data_for_plot = np.array(data)

	data_for_plot = data_for_plot.flatten()

	finally:
	f.close()

	# Calculate statistics using masked array (respects mask)
	mean_val = float(np.mean(data_for_stats))
	median_val = float(np.median(data_for_stats))
	min_val = float(np.min(data_for_stats))
	max_val = float(np.max(data_for_stats))
	std_val = float(np.std(data_for_stats))

	# Create figure
	fig = go.Figure()

	# Add histogram
	fig.add_trace(go.Histogram(
	x=data_for_plot,
	nbinsx=nbins,
	name='Distribution',
	marker_color='steelblue',
	opacity=0.7
	))

	# Add vertical lines for statistics
	fig.add_vline(
	x=mean_val,
	line_dash="dash",
	line_color="red",
	annotation_text=f"Mean: {mean_val:.2f}",
	annotation_position="top"
	)
	fig.add_vline(
	x=median_val,
	line_dash="dot",
	line_color="green",
	annotation_text=f"Median: {median_val:.2f}",
	annotation_position="bottom"
	)

	# Set title and labels
	title = title_override if title_override else f"Distribution of {dataset_path}"
	xlabel = xlabel_override if xlabel_override else "Value"

	fig.update_layout(
	title=title,
	xaxis_title=xlabel,
	yaxis_title="Frequency",
	height=500,
	showlegend=True
	)

	# Save as interactive HTML file
	html_path = tempfile.mktemp(suffix='.html', dir='/tmp')
	fig.write_html(html_path)

	# Return JSON status instead of plot object (avoids MCP serialization issues)
	result = {
	"status": "success",
	"message": f"Histogram created for {dataset_path}",
	"html_file": html_path,
	"statistics": {
	"mean": mean_val,
	"median": median_val,
	"min": min_val,
	"max": max_val,
	"std": std_val,
	"num_bins": nbins,
	"num_values": len(data_for_plot)
	},
	"download_info": "Download the HTML file to view the interactive histogram in your browser"
	}

	return result, html_path

	except Exception as e:
	import traceback
	return {
	"error": f"Failed to create histogram: {str(e)}",
	"traceback": traceback.format_exc(),
	"status": "failed"
	}, None


	def create_line_plot(file_path: str, dataset_path: str, slice_str: str = "",
	title_override: str = "", xlabel_override: str = "", ylabel_override: str = "",
	x_dataset_path: str = "", moving_avg_window: int = 0,
	memory_limit_mb: float = None) -> Tuple[Dict[str, Any], Optional[str]]:
	"""
	Create line plot for 1D data or time series.

	USE CASE: Visualize trends over time or along a single dimension.

	WHEN TO USE THIS:
	- Time series data (temperature over time, stock prices, etc.)
	- 1D profiles (depth profiles, altitude profiles)
	- Sequential data where order matters
	- When you want to see trends and patterns

	FEATURES:
	- Optional moving average smoothing to reduce noise
	- Custom X-axis data or auto-generated indices
	- Interactive Plotly visualization

	OUTPUT: Returns both JSON status (with statistics) AND downloadable HTML file.
	The HTML file contains an interactive Plotly line plot viewable in any browser.

	Args:
	file_path: Path to the HDF5/NetCDF file
	dataset_path: Path to Y-axis dataset/variable
	slice_str: Optional numpy-style slice notation
	title_override: Optional custom title (leave empty for automatic)
	xlabel_override: Optional custom x-axis label (leave empty for automatic)
	ylabel_override: Optional custom y-axis label (leave empty for automatic)
	x_dataset_path: Optional path to X-axis data (if empty, uses indices 0, 1, 2, ...)
	moving_avg_window: Window size for smoothing (0 or 1 = no smoothing, 2-1000 = apply moving average)
	memory_limit_mb: Memory limit in MB (default: tiered 100/500/2000)

	Returns:
	Tuple of (status_dict, html_file_path):
	- status_dict: Contains statistics and status info
	- html_file_path: Path to interactive HTML file (downloadable)

	Return value structure:
	(
	{
	"status": "success",
	"html_file": "/tmp/plot456.html", # ← Download this
	"num_points": 1750,
	"y_min": -5.2,
	"y_max": 42.1,
	"y_mean": 15.3,
	"y_std": 8.7,
	"smoothing": "Applied moving average with window size 10" # If smoothing used
	},
	"/tmp/plot456.html" # HTML file path
	)

	Workflow - Time series with smoothing:
	# Plot temperature time series with 30-point moving average
	result, html_path = create_line_plot(
	file_path,
	"temperature",
	slice_str=":, 45, 90", # All times, specific lat/lon
	moving_avg_window=30
	)
	# Download html_path to see both raw data and smoothed trend
	"""
	import plotly.graph_objects as go

	try:
	if not file_path:
	return {"error": "No file path provided", "status": "failed"}, None

	if not dataset_path:
	return {"error": "No dataset path provided", "status": "failed"}, None

	# Extract clean paths
	dataset_path = dataset_path.split(" (")[0] if " (" in dataset_path else dataset_path
	if x_dataset_path:
	x_dataset_path = x_dataset_path.split(" (")[0] if " (" in x_dataset_path else x_dataset_path

	# Open file and read Y data
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get Y dataset
	if file_type == "HDF5":
	y_var = f[dataset_path]
	else:
	y_var = f.variables[dataset_path]

	# Check memory safety for Y data
	safety_check = check_memory_safety(y_var.shape, y_var.dtype, slice_str, memory_limit_mb)

	if not safety_check["safe"]:
	error_result = {
	"error": safety_check["error"],
	"estimated_size_mb": safety_check["estimated_mb"],
	"status": "failed"
	}
	if "suggested_slice" in safety_check:
	error_result["suggested_slice"] = safety_check["suggested_slice"]
	error_result["suggestion"] = f"Try using: slice_str='{safety_check['suggested_slice']}'"
	return error_result, None

	# Read Y data
	if slice_str and slice_str.strip():
	idx = parse_slice(slice_str)
	y_data = y_var[idx]
	else:
	y_data = y_var[:]

	# Keep masked array for statistics
	y_data_for_stats = y_data

	# Convert to filled array for plotting
	if isinstance(y_data, np.ma.MaskedArray):
	y_data_for_plot = y_data.filled(np.nan)
	else:
	y_data_for_plot = np.array(y_data)

	y_data_for_plot = y_data_for_plot.flatten()

	# Handle X data
	if x_dataset_path:
	# Read X dataset
	if file_type == "HDF5":
	x_var = f[x_dataset_path]
	else:
	x_var = f.variables[x_dataset_path]

	# Check memory safety for X data
	x_safety_check = check_memory_safety(x_var.shape, x_var.dtype, slice_str, memory_limit_mb)

	if not x_safety_check["safe"]:
	error_result = {
	"error": f"X data: {x_safety_check['error']}",
	"estimated_size_mb": x_safety_check["estimated_mb"],
	"status": "failed"
	}
	if "suggested_slice" in x_safety_check:
	error_result["suggested_slice"] = x_safety_check["suggested_slice"]
	return error_result, None

	# Read X data
	if slice_str and slice_str.strip():
	x_data = x_var[idx]
	else:
	x_data = x_var[:]

	if not isinstance(x_data, np.ndarray):
	x_data = np.array(x_data)
	x_data = x_data.flatten()

	if len(x_data) != len(y_data_for_plot):
	return {
	"error": f"X and Y data length mismatch: {len(x_data)} vs {len(y_data_for_plot)}",
	"status": "failed"
	}, None
	else:
	# Use indices as X
	x_data = np.arange(len(y_data_for_plot))

	finally:
	f.close()

	# Create figure
	fig = go.Figure()

	# Add raw data trace
	fig.add_trace(go.Scatter(
	x=x_data,
	y=y_data_for_plot,
	mode='lines+markers',
	name='Raw Data',
	line=dict(color='steelblue', width=1),
	marker=dict(size=3),
	opacity=0.7
	))

	# Add moving average if requested
	smoothed_info = None
	if moving_avg_window and moving_avg_window > 1:
	# Validate window size
	if moving_avg_window > len(y_data_for_plot):
	smoothed_info = f"Moving average window ({moving_avg_window}) larger than data length ({len(y_data_for_plot)}), skipping smoothing"
	else:
	# Calculate moving average using convolution
	window = np.ones(moving_avg_window) / moving_avg_window
	y_smooth = np.convolve(y_data_for_plot, window, mode='valid')

	# Adjust X data to match smoothed Y length
	# 'valid' mode reduces length by (window_size - 1)
	offset = (moving_avg_window - 1) // 2
	x_smooth = x_data[offset:offset + len(y_smooth)]

	# Add smoothed trace
	fig.add_trace(go.Scatter(
	x=x_smooth,
	y=y_smooth,
	mode='lines',
	name=f'Moving Avg (n={moving_avg_window})',
	line=dict(color='red', width=2),
	opacity=0.9
	))

	smoothed_info = f"Applied moving average with window size {moving_avg_window}"

	# Set title and labels
	title = title_override if title_override else f"Line Plot: {dataset_path}"
	xlabel = xlabel_override if xlabel_override else (x_dataset_path if x_dataset_path else "Index")
	ylabel = ylabel_override if ylabel_override else dataset_path

	fig.update_layout(
	title=title,
	xaxis_title=xlabel,
	yaxis_title=ylabel,
	height=500,
	showlegend=True,
	hovermode='closest'
	)

	# Save as interactive HTML
	html_path = tempfile.mktemp(suffix='.html', dir='/tmp')
	fig.write_html(html_path)

	# Return JSON status
	result = {
	"status": "success",
	"message": f"Line plot created for {dataset_path}",
	"html_file": html_path,
	"statistics": {
	"num_points": len(y_data_for_plot),
	"y_min": float(np.min(y_data_for_stats)),
	"y_max": float(np.max(y_data_for_stats)),
	"y_mean": float(np.mean(y_data_for_stats)),
	"y_std": float(np.std(y_data_for_stats))
	},
	"download_info": "Download the HTML file to view the interactive line plot in your browser"
	}

	if smoothed_info:
	result["smoothing"] = smoothed_info

	return result, html_path

	except Exception as e:
	import traceback
	return {
	"error": f"Failed to create line plot: {str(e)}",
	"traceback": traceback.format_exc(),
	"status": "failed"
	}, None


	def create_heatmap(file_path: str, dataset_path: str, slice_str: str = "",
	title_override: str = "", xlabel_override: str = "", ylabel_override: str = "",
	colorscale: str = "Viridis", memory_limit_mb: float = None) -> Tuple[Dict[str, Any], Optional[str]]:
	"""
	Create 2D heatmap visualization.

	USE CASE: Visualize spatial patterns and 2D data distributions.

	WHEN TO USE THIS:
	- Geographic/spatial data (temperature maps, elevation, satellite imagery)
	- 2D slices from higher-dimensional data
	- Matrices and correlation data
	- Any data where you want to see spatial patterns

	FEATURES:
	- Multiple color scales for different data types. However, do not make any claims about what each color represents, as the colormap has default directionalities.
	- Auto-validates data is 2D (will slice higher dimensions if needed)
	- Interactive hover to inspect values

	OUTPUT: Returns both JSON status (with statistics) AND downloadable HTML file.
	The HTML file contains an interactive Plotly heatmap viewable in any browser.

	Args:
	file_path: Path to the HDF5/NetCDF file
	dataset_path: Path to 2D dataset/variable
	slice_str: Optional numpy-style slice (must result in 2D data, e.g., "0, :, :" for first time step)
	title_override: Optional custom title (leave empty for automatic)
	xlabel_override: Optional custom x-axis label (leave empty for automatic)
	ylabel_override: Optional custom y-axis label (leave empty for automatic)
	colorscale: Plotly colorscale name - options: Viridis, Plasma, Inferno, Magma, Cividis, Blues, Reds, YlOrRd, RdBu (default: Viridis)
	memory_limit_mb: Memory limit in MB (default: tiered 100/500/2000)

	Returns:
	Tuple of (status_dict, html_file_path):
	- status_dict: Contains statistics and status info
	- html_file_path: Path to interactive HTML file (downloadable)

	Return value structure:
	(
	{
	"status": "success",
	"html_file": "/tmp/plot789.html", # ← Download this
	"statistics": {
	"shape": [90, 180],
	"min": -5.2,
	"max": 42.1,
	"mean": 15.3,
	"std": 8.7,
	"colorscale": "Viridis"
	}
	},
	"/tmp/plot789.html" # HTML file path
	)

	Workflow - Spatial data visualization:
	# Visualize temperature at first time step
	result, html_path = create_heatmap(
	file_path,
	"temperature",
	slice_str="0, :, :", # Time=0, all lat, all lon
	colorscale="RdBu" # Red-blue for temperature
	)
	# Download html_path to see the spatial pattern

	Color scale recommendations:
	- Temperature data: "RdBu" (red-blue diverging) (IMPORTANT: Blue is positive, red is negative here. This MAY be opposite to what you are thinking, but safest to just NOT make a claim about colors.)
	- Elevation/depth: "Viridis" or "Plasma"
	- Precipitation: "Blues" or "YlOrRd"
	- General scientific: "Viridis" (perceptually uniform)
	"""
	import plotly.graph_objects as go

	try:
	if not file_path:
	return {"error": "No file path provided", "status": "failed"}, None

	if not dataset_path:
	return {"error": "No dataset path provided", "status": "failed"}, None

	# Extract clean path
	dataset_path = dataset_path.split(" (")[0] if " (" in dataset_path else dataset_path

	# Open file and read data
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get dataset
	if file_type == "HDF5":
	var = f[dataset_path]
	else:
	var = f.variables[dataset_path]

	# Check memory safety
	safety_check = check_memory_safety(var.shape, var.dtype, slice_str, memory_limit_mb)

	if not safety_check["safe"]:
	error_result = {
	"error": safety_check["error"],
	"estimated_size_mb": safety_check["estimated_mb"],
	"status": "failed"
	}
	if "suggested_slice" in safety_check:
	error_result["suggested_slice"] = safety_check["suggested_slice"]
	error_result["suggestion"] = f"Try using: slice_str='{safety_check['suggested_slice']}'"
	return error_result, None

	# Read data
	if slice_str and slice_str.strip():
	idx = parse_slice(slice_str)
	data = var[idx]
	else:
	data = var[:]

	# Keep masked array for statistics
	data_for_stats = data

	# Convert to filled array for plotting
	if isinstance(data, np.ma.MaskedArray):
	data_for_plot = data.filled(np.nan)
	else:
	data_for_plot = np.array(data)

	# Ensure 2D
	if data_for_plot.ndim == 1:
	return {
	"error": f"Data is 1D (shape {data_for_plot.shape}). Heatmap requires 2D data. Try using create_line_plot instead.",
	"status": "failed"
	}, None
	elif data_for_plot.ndim > 2:
	# Take first 2D slice
	while data_for_plot.ndim > 2:
	data_for_plot = data_for_plot[0]
	suggestion = f"Data has {len(var.shape)}D shape. Showing first 2D slice. Use slice_str to select specific slice."
	else:
	suggestion = None

	finally:
	f.close()

	# Calculate statistics using masked array
	min_val = float(np.min(data_for_stats))
	max_val = float(np.max(data_for_stats))
	mean_val = float(np.mean(data_for_stats))
	std_val = float(np.std(data_for_stats))

	# Create figure using filled array
	fig = go.Figure(data=go.Heatmap(
	z=data_for_plot,
	colorscale=colorscale,
	colorbar=dict(title="Value"),
	hovertemplate='X: %{x}<br>Y: %{y}<br>Value: %{z}<extra></extra>'
	))

	# Set title and labels
	title = title_override if title_override else f"Heatmap: {dataset_path}"
	xlabel = xlabel_override if xlabel_override else "X"
	ylabel = ylabel_override if ylabel_override else "Y"

	fig.update_layout(
	title=title,
	xaxis_title=xlabel,
	yaxis_title=ylabel,
	height=600,
	width=700
	)

	# Save as interactive HTML
	html_path = tempfile.mktemp(suffix='.html', dir='/tmp')
	fig.write_html(html_path)

	# Return JSON status
	result = {
	"status": "success",
	"message": f"Heatmap created for {dataset_path}",
	"html_file": html_path,
	"statistics": {
	"shape": list(data_for_plot.shape),
	"min": min_val,
	"max": max_val,
	"mean": mean_val,
	"std": std_val,
	"colorscale": colorscale
	},
	"download_info": "Download the HTML file to view the interactive heatmap in your browser"
	}

	if suggestion:
	result["note"] = suggestion

	return result, html_path

	except Exception as e:
	import traceback
	return {
	"error": f"Failed to create heatmap: {str(e)}",
	"traceback": traceback.format_exc(),
	"status": "failed"
	}, None


	def create_scatter_plot(file_path: str, x_dataset_path: str, y_dataset_path: str,
	x_slice_str: str = "", y_slice_str: str = "",
	title_override: str = "", xlabel_override: str = "", ylabel_override: str = "",
	memory_limit_mb: float = None) -> Tuple[Dict[str, Any], Optional[str]]:
	"""
	Create scatter plot comparing two variables.

	USE CASE: Explore relationships and correlations between two variables.

	WHEN TO USE THIS:
	- Check if two variables are correlated
	- Compare data from different time periods or locations
	- Validate model predictions vs observations
	- Identify linear or non-linear relationships

	FEATURES:
	- Automatic correlation calculation (Pearson's r)
	- Linear regression with R² value
	- Regression line overlay
	- Independent slicing for X and Y variables

	OUTPUT: Returns both JSON status (with correlation stats) AND downloadable HTML file.
	The HTML file contains an interactive Plotly scatter plot viewable in any browser.

	Args:
	file_path: Path to the HDF5/NetCDF file
	x_dataset_path: Path to X-axis dataset/variable
	y_dataset_path: Path to Y-axis dataset/variable
	x_slice_str: Optional numpy-style slice for X data (can differ from Y slice)
	y_slice_str: Optional numpy-style slice for Y data (can differ from X slice)
	title_override: Optional custom title (leave empty for automatic)
	xlabel_override: Optional custom x-axis label (leave empty for automatic)
	ylabel_override: Optional custom y-axis label (leave empty for automatic)
	memory_limit_mb: Memory limit in MB (default: tiered 100/500/2000)

	Returns:
	Tuple of (status_dict, html_file_path):
	- status_dict: Contains correlation and regression statistics
	- html_file_path: Path to interactive HTML file (downloadable)

	Return value structure:
	(
	{
	"status": "success",
	"html_file": "/tmp/plot999.html", # ← Download this
	"statistics": {
	"num_points": 1620,
	"correlation": 0.87, # Pearson correlation coefficient
	"r_squared": 0.76, # R² value
	"slope": 1.23,
	"intercept": -5.4,
	"p_value": 1.2e-145 # Statistical significance
	}
	},
	"/tmp/plot999.html" # HTML file path
	)

	Workflow - Compare time periods:
	# Compare temperature at same location, different times
	result, html_path = create_scatter_plot(
	file_path,
	x_dataset_path="temperature",
	y_dataset_path="temperature",
	x_slice_str="0, :, :", # Time index 0
	y_slice_str="100, :, :" # Time index 100
	)
	# r² in result["statistics"]["r_squared"] shows how similar the patterns are
	"""
	import plotly.graph_objects as go

	try:
	from scipy import stats
	except ImportError:
	return {
	"error": "scipy library not available. Install with: pip install scipy",
	"status": "failed"
	}, None

	try:
	if not file_path:
	return {"error": "No file path provided", "status": "failed"}, None

	if not x_dataset_path or not y_dataset_path:
	return {"error": "Both X and Y dataset paths required", "status": "failed"}, None

	# Extract clean paths
	x_dataset_path = x_dataset_path.split(" (")[0] if " (" in x_dataset_path else x_dataset_path
	y_dataset_path = y_dataset_path.split(" (")[0] if " (" in y_dataset_path else y_dataset_path

	# Open file
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get X and Y datasets (may be the same variable)
	if file_type == "HDF5":
	x_var = f[x_dataset_path]
	# Get Y separately even if same path (avoid reference issues)
	y_var = f[y_dataset_path]
	else:
	x_var = f.variables[x_dataset_path]
	y_var = f.variables[y_dataset_path]

	# Check memory safety for X
	x_safety = check_memory_safety(x_var.shape, x_var.dtype, x_slice_str, memory_limit_mb)
	if not x_safety["safe"]:
	error_result = {
	"error": f"X data: {x_safety['error']}",
	"estimated_size_mb": x_safety["estimated_mb"],
	"status": "failed"
	}
	if "suggested_slice" in x_safety:
	error_result["suggested_slice_x"] = x_safety["suggested_slice"]
	return error_result, None

	# Check memory safety for Y (use same variable shape if same dataset)
	y_safety = check_memory_safety(y_var.shape, y_var.dtype, y_slice_str, memory_limit_mb)
	if not y_safety["safe"]:
	error_result = {
	"error": f"Y data: {y_safety['error']}",
	"estimated_size_mb": y_safety["estimated_mb"],
	"status": "failed"
	}
	if "suggested_slice" in y_safety:
	error_result["suggested_slice_y"] = y_safety["suggested_slice"]
	return error_result, None

	# Read X data - parse slice and read
	if x_slice_str and x_slice_str.strip():
	x_idx = parse_slice(x_slice_str)
	x_data = x_var[x_idx]
	else:
	x_data = x_var[:]

	# Read Y data - parse slice and read
	if y_slice_str and y_slice_str.strip():
	y_idx = parse_slice(y_slice_str)
	y_data = y_var[y_idx]
	else:
	y_data = y_var[:]

	# Keep masked arrays for statistics
	x_data_for_stats = x_data
	y_data_for_stats = y_data

	# Convert to filled arrays for plotting
	if isinstance(x_data, np.ma.MaskedArray):
	x_data_for_plot = x_data.filled(np.nan)
	else:
	x_data_for_plot = np.array(x_data)

	if isinstance(y_data, np.ma.MaskedArray):
	y_data_for_plot = y_data.filled(np.nan)
	else:
	y_data_for_plot = np.array(y_data)

	# Flatten both arrays
	x_data_for_plot = x_data_for_plot.flatten()
	y_data_for_plot = y_data_for_plot.flatten()

	# Check lengths match
	if len(x_data_for_plot) != len(y_data_for_plot):
	return {
	"error": f"X and Y data length mismatch after slicing: {len(x_data_for_plot)} vs {len(y_data_for_plot)}",
	"x_slice": x_slice_str if x_slice_str else "no slice",
	"y_slice": y_slice_str if y_slice_str else "no slice",
	"x_shape_after_slice": x_data_for_plot.shape,
	"y_shape_after_slice": y_data_for_plot.shape,
	"suggestion": "Adjust slices to produce equal-length arrays",
	"status": "failed"
	}, None

	finally:
	f.close()

	# Calculate correlation (check for valid data)
	if len(x_data_for_plot) < 2:
	return {
	"error": f"Not enough data points for correlation: {len(x_data_for_plot)} points (need at least 2)",
	"status": "failed"
	}, None

	# For regression, need to filter out NaN/masked pairs
	# Use the plot data (which has NaN for masked values) and create valid mask
	x_flat = x_data_for_plot.flatten()
	y_flat = y_data_for_plot.flatten()

	# Create mask for valid (non-NaN) pairs
	valid_mask = ~(np.isnan(x_flat) \| np.isnan(y_flat))
	x_valid = x_flat[valid_mask]
	y_valid = y_flat[valid_mask]

	if len(x_valid) < 2:
	return {
	"error": f"Not enough valid (non-NaN) data points: {len(x_valid)} points (need at least 2)",
	"status": "failed"
	}, None

	# Compute correlation and regression on valid data only
	correlation = float(np.corrcoef(x_valid, y_valid)[0, 1])
	slope, intercept, r_value, p_value, std_err = stats.linregress(x_valid, y_valid)

	# Create figure
	fig = go.Figure()

	# Add scatter points
	fig.add_trace(go.Scatter(
	x=x_data_for_plot,
	y=y_data_for_plot,
	mode='markers',
	name='Data',
	marker=dict(
	size=6,
	color='steelblue',
	opacity=0.6
	),
	hovertemplate=f'{x_dataset_path}: %{{x}}<br>{y_dataset_path}: %{{y}}<extra></extra>'
	))

	# Add regression line (use nanmin/nanmax to handle NaN values)
	x_range = np.array([np.nanmin(x_data_for_plot), np.nanmax(x_data_for_plot)])
	y_pred = slope * x_range + intercept

	fig.add_trace(go.Scatter(
	x=x_range,
	y=y_pred,
	mode='lines',
	name=f'Fit (R²={r_value**2:.3f})',
	line=dict(color='red', width=2, dash='dash'),
	showlegend=True
	))

	# Set title and labels
	title = title_override if title_override else f"{y_dataset_path} vs {x_dataset_path}"
	xlabel = xlabel_override if xlabel_override else x_dataset_path
	ylabel = ylabel_override if ylabel_override else y_dataset_path

	fig.update_layout(
	title=title,
	xaxis_title=xlabel,
	yaxis_title=ylabel,
	height=600,
	width=700,
	showlegend=True,
	hovermode='closest'
	)

	# Save as interactive HTML
	html_path = tempfile.mktemp(suffix='.html', dir='/tmp')
	fig.write_html(html_path)

	# Return JSON status
	result = {
	"status": "success",
	"message": f"Scatter plot created for {y_dataset_path} vs {x_dataset_path}",
	"html_file": html_path,
	"statistics": {
	"num_points": len(x_data_for_plot),
	"correlation": correlation,
	"r_squared": float(r_value**2),
	"slope": float(slope),
	"intercept": float(intercept),
	"p_value": float(p_value)
	},
	"download_info": "Download the HTML file to view the interactive scatter plot in your browser"
	}

	return result, html_path

	except Exception as e:
	import traceback
	return {
	"error": f"Failed to create scatter plot: {str(e)}",
	"traceback": traceback.format_exc(),
	"status": "failed"
	}, None


	def coordinate_to_index(file_path: str, coordinate_var: str, value: float) -> Dict[str, Any]:
	"""
	Convert coordinate value to array index.

	USE CASE: "What index corresponds to coordinate value X?"
	Example: "What index corresponds to latitude 35.5?" or "What time index is year 2020?"

	WHEN TO USE THIS:
	- You have a coordinate value and need to find the nearest index for slicing
	- You're building a slice string and need to know which indices to use
	- You want to extract data at a specific coordinate location

	Args:
	file_path: Path to the HDF5/NetCDF file
	coordinate_var: Name of coordinate variable (e.g., 'lat', 'lon', 'time', 'depth')
	value: Coordinate value to find (will find nearest match)

	Returns:
	Dictionary with keys:
	- status (str): "success" or error info
	- index (int): The nearest index for this coordinate value
	- requested_value (float): The value you searched for
	- actual_value (float): The actual coordinate value at the found index
	- distance (float): How far the actual value is from your requested value
	- units (str): Units of the coordinate
	- closest_match (bool): True if within half a grid spacing (good match)
	- coordinate_variable (str): Name of the coordinate variable
	- array_size (int): Total size of coordinate array

	Return value structure:
	{
	"status": "success",
	"index": 45, # ← Use this index in your slice
	"requested_value": 35.5,
	"actual_value": 36.0,
	"distance": 0.5,
	"closest_match": true,
	"units": "degrees_north"
	}

	Example workflow - Extract data at specific location:
	# Find index for latitude 40.0
	lat_result = coordinate_to_index(file_path, "lat", 40.0)
	lat_idx = lat_result["index"]

	# Find index for longitude -75.0
	lon_result = coordinate_to_index(file_path, "lon", -75.0)
	lon_idx = lon_result["index"]

	# Now use these indices to slice the data
	slice_str = f":, {lat_idx}, {lon_idx}" # All times, specific lat/lon
	data = read_dataset(file_path, "temperature", slice_str)
	"""
	try:
	if not file_path:
	return {"error": "No file path provided", "status": "failed"}

	if not coordinate_var:
	return {"error": "No coordinate variable specified", "status": "failed"}

	# Open file
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get coordinate variable
	if file_type == "HDF5":
	if coordinate_var not in f:
	available = list(f.keys())
	return {
	"error": f"Coordinate variable '{coordinate_var}' not found",
	"available_variables": available[:20], # Limit to avoid token explosion
	"status": "failed"
	}
	coord_data = np.array(f[coordinate_var][:])
	units = f[coordinate_var].attrs.get('units', 'unknown') if hasattr(f[coordinate_var], 'attrs') else 'unknown'
	else: # NetCDF
	if coordinate_var not in f.variables:
	available = list(f.variables.keys())
	return {
	"error": f"Coordinate variable '{coordinate_var}' not found",
	"available_variables": available[:20],
	"status": "failed"
	}
	var = f.variables[coordinate_var]
	coord_data = np.array(var[:])
	units = var.units if hasattr(var, 'units') else 'unknown'

	# Find closest index
	idx = int(np.argmin(np.abs(coord_data - value)))
	actual_value = float(coord_data[idx])
	distance = abs(actual_value - value)

	# Calculate if within half grid spacing (good match)
	if len(coord_data) > 1:
	grid_spacing = np.mean(np.abs(np.diff(coord_data)))
	closest_match = distance < grid_spacing / 2
	else:
	closest_match = True

	return {
	"status": "success",
	"index": idx,
	"requested_value": value,
	"actual_value": actual_value,
	"distance": distance,
	"units": str(units),
	"closest_match": closest_match,
	"coordinate_variable": coordinate_var,
	"array_size": len(coord_data)
	}

	finally:
	f.close()

	except Exception as e:
	import traceback
	return {
	"error": f"Failed to convert coordinate to index: {str(e)}",
	"traceback": traceback.format_exc(),
	"status": "failed"
	}


	def index_to_coordinate(file_path: str, coordinate_var: str, index: int) -> Dict[str, Any]:
	"""
	Convert array index to coordinate value.

	USE CASE: "What is the actual value at index N?"
	Example: "What date is at time index 852?" or "What latitude is at index 45?"

	WHEN TO USE THIS:
	- You have an index number and need the corresponding coordinate value
	- You're interpreting results from other tools that return indices
	- You need to verify what a specific index represents

	DO NOT use this if:
	- The coordinate values are already in the file attributes → use get_attributes()
	- You want to read the entire coordinate array → use read_dataset()

	Args:
	file_path: Path to the HDF5/NetCDF file
	coordinate_var: Name of coordinate variable (e.g., 'lat', 'lon', 'time', 'depth')
	index: Array index to look up (0-based indexing)

	Returns:
	Dictionary with keys:
	- status (str): "success" or error info
	- index (int): The queried index
	- value (float): The coordinate value at that index
	- units (str): Units of the coordinate (e.g., "degrees_north", "days since 1800-01-01")
	- coordinate_variable (str): Name of the coordinate variable
	- array_size (int): Total size of coordinate array

	Return value structure:
	{
	"status": "success",
	"index": 852,
	"value": 55165.0, # ← The coordinate value you need
	"units": "days since 1800-01-01 00:00:00",
	"coordinate_variable": "time",
	"array_size": 1750
	}

	Example workflow:
	# You found that interesting data is at index 100
	result = index_to_coordinate(file_path, "time", 100)
	time_value = result["value"] # Get the actual time value
	units = result["units"] # Get the units to interpret it
	"""
	try:
	if not file_path:
	return {"error": "No file path provided", "status": "failed"}

	if not coordinate_var:
	return {"error": "No coordinate variable specified", "status": "failed"}

	# Open file
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get coordinate variable
	if file_type == "HDF5":
	if coordinate_var not in f:
	available = list(f.keys())
	return {
	"error": f"Coordinate variable '{coordinate_var}' not found",
	"available_variables": available[:20],
	"status": "failed"
	}
	coord_data = np.array(f[coordinate_var][:])
	units = f[coordinate_var].attrs.get('units', 'unknown') if hasattr(f[coordinate_var], 'attrs') else 'unknown'
	else: # NetCDF
	if coordinate_var not in f.variables:
	available = list(f.variables.keys())
	return {
	"error": f"Coordinate variable '{coordinate_var}' not found",
	"available_variables": available[:20],
	"status": "failed"
	}
	var = f.variables[coordinate_var]
	coord_data = np.array(var[:])
	units = var.units if hasattr(var, 'units') else 'unknown'

	# Check index bounds
	if index < 0 or index >= len(coord_data):
	return {
	"error": f"Index {index} out of bounds (array size: {len(coord_data)})",
	"valid_range": f"0 to {len(coord_data)-1}",
	"status": "failed"
	}

	value = float(coord_data[index])

	return {
	"status": "success",
	"index": index,
	"value": value,
	"units": str(units),
	"coordinate_variable": coordinate_var,
	"array_size": len(coord_data)
	}

	finally:
	f.close()

	except Exception as e:
	import traceback
	return {
	"error": f"Failed to convert index to coordinate: {str(e)}",
	"traceback": traceback.format_exc(),
	"status": "failed"
	}


	def query_content(file_path: str, query: str) -> Dict[str, Any]:
	"""
	Semantic search through file contents - helps find relevant data.

	USE CASE: "Find all datasets related to X" where X is a keyword.

	WHAT IT SEARCHES:
	- Dataset/variable names
	- Attribute names and values
	- Dimension names
	- Long names and descriptions in metadata

	WHEN TO USE THIS:
	- You don't know the exact dataset name but know what you're looking for
	- You want to find all variables related to a topic (e.g., "temperature", "wind", "pressure")
	- The file has many variables and you need to narrow down

	SEARCH TIPS:
	- Use specific keywords (e.g., "temp", "precip", "wind")
	- Search is case-insensitive
	- Partial matches work (searching "temp" finds "temperature", "temp_anomaly", etc.)

	Args:
	file_path: Path to the HDF5/NetCDF file
	query: Search term (case-insensitive, matches substrings)

	Returns:
	Dictionary with keys:
	- datasets (list): Matching datasets with match reason
	- dimensions (list): Matching dimensions
	- num_dataset_matches (int): Count of dataset matches
	- num_dimension_matches (int): Count of dimension matches
	- total_matches (int): Total matches found
	- query (str): The search term used
	- message (str): Summary of results

	Return value structure:
	{
	"datasets": [
	{
	"name": "air_temperature",
	"shape": [100, 90, 180],
	"dtype": "float32",
	"match_reason": "name",
	"long_name": "Air temperature",
	"units": "K"
	}
	],
	"dimensions": [],
	"total_matches": 1,
	"message": "Found 1 matches for query: 'temp'"
	}

	Example workflow - Finding the right variable:
	# Search for temperature-related variables
	results = query_content(file_path, "temp")
	# Review matches and pick the right one
	dataset_name = results["datasets"][0]["name"] # e.g., "air_temperature"
	# Now use that dataset name
	stats = compute_statistics(file_path, dataset_name)
	"""
	try:
	if not file_path:
	return {"error": "No file path provided", "status": "failed"}

	if not query or not query.strip():
	return {"error": "No query string provided", "status": "failed"}

	query_lower = query.lower().strip()

	# Open file
	f, file_type = open_file_with_fallback(file_path)

	matches = {
	"datasets": [],
	"attributes": [],
	"dimensions": []
	}

	try:
	if file_type == "HDF5":
	def search_visitor(name, obj):
	full_path = "/" + name

	# Check dataset names
	if isinstance(obj, h5py.Dataset):
	if query_lower in name.lower():
	matches["datasets"].append({
	"path": full_path,
	"shape": list(obj.shape),
	"dtype": str(obj.dtype),
	"match_reason": "name"
	})

	# Check dataset attributes
	for attr_name, attr_val in obj.attrs.items():
	if query_lower in attr_name.lower() or query_lower in str(attr_val).lower():
	if full_path not in [d["path"] for d in matches["datasets"]]:
	matches["datasets"].append({
	"path": full_path,
	"shape": list(obj.shape),
	"dtype": str(obj.dtype),
	"match_reason": f"attribute: {attr_name}"
	})
	break

	f.visititems(search_visitor)

	else: # NetCDF
	# Search variable names and attributes
	for var_name, var in f.variables.items():
	matched = False
	match_reason = None

	# Check variable name
	if query_lower in var_name.lower():
	matched = True
	match_reason = "name"

	# Check attributes
	if not matched:
	var_attrs = get_var_attrs_dict(var, file_type)
	for attr, attr_val in var_attrs.items():
	if query_lower in attr.lower() or query_lower in str(attr_val).lower():
	matched = True
	match_reason = f"attribute: {attr}"
	break

	if matched:
	info = {
	"name": var_name,
	"dimensions": list(var.dimensions) if hasattr(var, 'dimensions') else [],
	"shape": list(var.shape),
	"dtype": str(var.dtype),
	"match_reason": match_reason
	}
	# Add key attributes
	if hasattr(var, 'long_name'):
	info["long_name"] = str(var.long_name)
	if hasattr(var, 'units'):
	info["units"] = str(var.units)
	matches["datasets"].append(info)

	# Search dimensions
	for dim_name in f.dimensions.keys():
	if query_lower in dim_name.lower():
	matches["dimensions"].append({
	"name": dim_name,
	"size": len(f.dimensions[dim_name])
	})

	finally:
	f.close()

	matches["status"] = "success"
	matches["query"] = query
	matches["num_dataset_matches"] = len(matches["datasets"])
	matches["num_dimension_matches"] = len(matches["dimensions"])
	matches["total_matches"] = len(matches["datasets"]) + len(matches["dimensions"])

	if matches["total_matches"] == 0:
	matches["message"] = f"No matches found for query: '{query}'"
	else:
	matches["message"] = f"Found {matches['total_matches']} matches for query: '{query}'"

	return matches

	except Exception as e:
	import traceback
	return {
	"error": f"Failed to search file contents: {str(e)}",
	"traceback": traceback.format_exc(),
	"status": "failed"
	}


	def UI_HELPER(file_obj) -> Dict[str, Any]:
	"""
	UI ONLY function - MCP clients ignore this!

	Browser interface helper for local files.
	Returns file path for use in other tools.

	Args:
	file_obj: File object from browser interface

	Returns:
	Dictionary with file_path
	"""
	import shutil

	try:
	if file_obj is None:
	return {
	"error": "No file provided",
	"status": "failed"
	}

	# Get the file path from the file object
	# Gradio file objects have a .name attribute with the temp path
	if hasattr(file_obj, 'name'):
	file_path = file_obj.name
	else:
	file_path = str(file_obj)

	# Verify it's an HDF5 or NetCDF file
	if not any(file_path.endswith(ext) for ext in ['.h5', '.hdf5', '.nc', '.nc4']):
	return {
	"error": f"Unsupported file type. Must be .h5, .hdf5, .nc, or .nc4",
	"file_path": file_path,
	"status": "failed"
	}

	# Try to open to validate
	try:
	f, file_type = open_file_with_fallback(file_path)
	f.close()
	except Exception as e:
	return {
	"error": f"Failed to open file: {str(e)}",
	"file_path": file_path,
	"status": "failed"
	}

	return {
	"status": "success",
	"file_path": file_path,
	"message": "File ready! Copy the file_path above to use in other tools.",
	"instruction": "Use this file_path in the other tabs (Data Analysis, Visualizations)"
	}

	except Exception as e:
	import traceback
	return {
	"error": f"Failed to process file: {str(e)}",
	"traceback": traceback.format_exc(),
	"status": "failed"
	}


	def download_example(example_name: str) -> Dict[str, Any]:
	"""
	Download a pre-configured example dataset.
	Provides quick access to curated test datasets for exploration.

	Args:
	example_name: Name of example dataset to download

	Returns:
	Dictionary containing file_path and file structure (same as download_file)
	"""
	# Curated example datasets from xarray-data repository
	examples = {
	"small_netcdf": {
	"url": "https://github.com/pydata/xarray-data/raw/master/tiny.nc",
	"description": "🛰️ Tiny NetCDF - Small test file for quick validation"
	},
	"ocean_basin": {
	"url": "https://github.com/pydata/xarray-data/raw/master/basin_mask.nc",
	"description": "🌊 Ocean Basin Mask - Global ocean basin classification"
	},
	"air_temperature": {
	"url": "https://github.com/pydata/xarray-data/raw/master/air_temperature.nc",
	"description": "🔬 Air Temperature - NCEP/NCAR reanalysis data"
	},
	"ocean_model": {
	"url": "https://github.com/pydata/xarray-data/raw/master/ROMS_example.nc",
	"description": "🌍 Ocean Model - ROMS (Regional Ocean Modeling System) output"
	},
	"era_interim": {
	"url": "https://github.com/pydata/xarray-data/raw/master/eraint_uvz.nc",
	"description": "☁️ ERA-Interim - Wind and geopotential height data"
	}
	}

	if not example_name:
	return {
	"error": "No example name provided",
	"available_examples": list(examples.keys()),
	"status": "failed"
	}

	if example_name not in examples:
	return {
	"error": f"Unknown example: '{example_name}'",
	"available_examples": list(examples.keys()),
	"descriptions": {k: v["description"] for k, v in examples.items()},
	"status": "failed"
	}

	# Use the existing download_file function
	example_info = examples[example_name]
	result = download_file(example_info["url"])

	# Add example metadata to result
	if "status" in result and result["status"] == "success":
	result["example_name"] = example_name
	result["example_description"] = example_info["description"]

	return result


	def create_contour_plot(file_path: str, dataset_path: str, slice_str: str = "",
	title_override: str = "", xlabel_override: str = "", ylabel_override: str = "",
	num_contours: int = 20, colorscale: str = "Viridis",
	memory_limit_mb: float = None) -> Tuple[Dict[str, Any], Optional[str]]:
	"""
	Create contour plot for 2D data.

	USE CASE: Visualize elevation/intensity maps with labeled contour lines.

	WHEN TO USE THIS:
	- Topographic/bathymetric data (elevation, ocean depth)
	- Pressure/temperature fields with distinct levels
	- When you need to see specific value boundaries
	- Geographic data where contours show meaningful thresholds

	DIFFERENCE FROM HEATMAP:
	- Heatmap: Continuous color gradient, best for overall patterns
	- Contour: Discrete lines at specific values, best for identifying levels

	FEATURES:
	- Labeled contour lines showing exact values
	- Adjustable number of contour levels
	- Multiple color scales
	- Interactive hover to see values

	OUTPUT: Returns both JSON status (with statistics) AND downloadable HTML file.
	The HTML file contains an interactive Plotly contour plot viewable in any browser.

	Args:
	file_path: Path to the HDF5/NetCDF file
	dataset_path: Path to 2D dataset/variable
	slice_str: Optional numpy-style slice (must result in 2D data, e.g., "0, :, :" for first time step)
	title_override: Optional custom title (leave empty for automatic)
	xlabel_override: Optional custom x-axis label (leave empty for automatic)
	ylabel_override: Optional custom y-axis label (leave empty for automatic)
	num_contours: Number of contour levels (default: 20, range: 5-50)
	colorscale: Plotly colorscale name - same options as heatmap (default: Viridis)
	memory_limit_mb: Memory limit in MB (default: tiered 100/500/2000)

	Returns:
	Tuple of (status_dict, html_file_path):
	- status_dict: Contains statistics and contour info
	- html_file_path: Path to interactive HTML file (downloadable)

	Return value structure:
	(
	{
	"status": "success",
	"html_file": "/tmp/plot321.html", # ← Download this
	"statistics": {
	"shape": [90, 180],
	"min": -5000.0,
	"max": 8848.0,
	"mean": 237.5,
	"std": 1250.3,
	"num_contours": 20,
	"colorscale": "Viridis"
	}
	},
	"/tmp/plot321.html" # HTML file path
	)

	Workflow - Elevation mapping:
	# Create topographic map with 30 elevation contours
	result, html_path = create_contour_plot(
	file_path,
	"elevation",
	num_contours=30,
	colorscale="YlOrRd"
	)
	# Download html_path to see elevation levels with labeled contours
	"""
	import plotly.graph_objects as go

	try:
	if not file_path:
	return {"error": "No file path provided", "status": "failed"}, None

	if not dataset_path:
	return {"error": "No dataset path provided", "status": "failed"}, None

	# Extract clean path
	dataset_path = dataset_path.split(" (")[0] if " (" in dataset_path else dataset_path

	# Open file and read data
	f, file_type = open_file_with_fallback(file_path)

	try:
	# Get dataset
	if file_type == "HDF5":
	var = f[dataset_path]
	else:
	var = f.variables[dataset_path]

	# Check memory safety
	safety_check = check_memory_safety(var.shape, var.dtype, slice_str, memory_limit_mb)

	if not safety_check["safe"]:
	error_result = {
	"error": safety_check["error"],
	"estimated_size_mb": safety_check["estimated_mb"],
	"status": "failed"
	}
	if "suggested_slice" in safety_check:
	error_result["suggested_slice"] = safety_check["suggested_slice"]
	error_result["suggestion"] = f"Try using: slice_str='{safety_check['suggested_slice']}'"
	return error_result, None

	# Read data
	if slice_str and slice_str.strip():
	idx = parse_slice(slice_str)
	data = var[idx]
	else:
	data = var[:]

	# Keep masked array for statistics
	data_for_stats = data

	# Convert to filled array for plotting
	if isinstance(data, np.ma.MaskedArray):
	data_for_plot = data.filled(np.nan)
	else:
	data_for_plot = np.array(data)

	# Ensure 2D
	if data_for_plot.ndim == 1:
	return {
	"error": f"Data is 1D (shape {data_for_plot.shape}). Contour plot requires 2D data.",
	"status": "failed"
	}, None
	elif data_for_plot.ndim > 2:
	# Take first 2D slice
	while data_for_plot.ndim > 2:
	data_for_plot = data_for_plot[0]
	suggestion = f"Data has {len(var.shape)}D shape. Showing first 2D slice. Use slice_str to select specific slice."
	else:
	suggestion = None

	finally:
	f.close()

	# Calculate statistics using masked array
	min_val = float(np.min(data_for_stats))
	max_val = float(np.max(data_for_stats))
	mean_val = float(np.mean(data_for_stats))
	std_val = float(np.std(data_for_stats))

	# Create figure using filled array
	fig = go.Figure(data=go.Contour(
	z=data_for_plot,
	colorscale=colorscale,
	ncontours=num_contours,
	colorbar=dict(title="Value"),
	hovertemplate='X: %{x}<br>Y: %{y}<br>Value: %{z}<extra></extra>',
	contours=dict(
	showlabels=True,
	labelfont=dict(size=10, color='white')
	)
	))

	# Set title and labels
	title = title_override if title_override else f"Contour Plot: {dataset_path}"
	xlabel = xlabel_override if xlabel_override else "X"
	ylabel = ylabel_override if ylabel_override else "Y"

	fig.update_layout(
	title=title,
	xaxis_title=xlabel,
	yaxis_title=ylabel,
	height=600,
	width=700
	)

	# Save as interactive HTML
	html_path = tempfile.mktemp(suffix='.html', dir='/tmp')
	fig.write_html(html_path)

	# Return JSON status
	result = {
	"status": "success",
	"message": f"Contour plot created for {dataset_path}",
	"html_file": html_path,
	"statistics": {
	"shape": list(data_for_plot.shape),
	"min": min_val,
	"max": max_val,
	"mean": mean_val,
	"std": std_val,
	"num_contours": num_contours,
	"colorscale": colorscale
	},
	"download_info": "Download the HTML file to view the interactive contour plot in your browser"
	}

	if suggestion:
	result["note"] = suggestion

	return result, html_path

	except Exception as e:
	import traceback
	return {
	"error": f"Failed to create contour plot: {str(e)}",
	"traceback": traceback.format_exc(),
	"status": "failed"
	}, None


	# ============================================
	# GRADIO MCP TOOLS (STATELESS)
	# These are the ONLY functions exposed as MCP tools
	# ============================================

	def build_mcp_tools():
	"""
	Build explicit MCP tool interfaces.
	CRITICAL: Use only Textbox/Number/Radio components - NO Dropdowns with state!
	"""

	tools = []

	# Tool 1: Download File
	tools.append(gr.Interface(
	fn=download_file,
	inputs=[
	gr.Textbox(
	label="URL",
	placeholder="https://github.com/pydata/xarray-data/raw/master/ROMS_example.nc",
	info="Direct URL to HDF5 (.h5, .hdf5) or NetCDF (.nc, .nc4) file"
	)
	],
	outputs=gr.JSON(label="Download Result - Copy the 'file_path' value to use in other tools"),
	api_name="download_file",
	title="Download File",
	description="Download and parse HDF5/NetCDF file from URL. Returns file_path to use in other tools. IMPORTANT: Copy the 'file_path' from the result to use in other tool tabs."
	))

	# Tool 1b: UI Helper (UI ONLY)
	tools.append(gr.Interface(
	fn=UI_HELPER,
	inputs=[
	gr.File(
	label="Select Local File",
	file_types=[".h5", ".hdf5", ".nc", ".nc4"],
	type="filepath"
	)
	],
	outputs=gr.JSON(label="File Path Result - Copy the 'file_path' value to use in other tools"),
	api_name="UI_HELPER",
	title="Local File Upload",
	description="UI ONLY function - MCP clients ignore this! Browser interface helper. Returns file_path for use in other tools."
	))

	# Tool 1c: Download Example Dataset
	tools.append(gr.Interface(
	fn=download_example,
	inputs=[
	gr.Radio(
	label="Select Example Dataset",
	choices=[
	"small_netcdf",
	"ocean_basin",
	"air_temperature",
	"ocean_model",
	"era_interim"
	],
	value="small_netcdf",
	info="Pre-configured example datasets for quick exploration"
	)
	],
	outputs=gr.JSON(label="Download Result - Copy the 'file_path' value to use in other tools"),
	api_name="download_example",
	title="Download Example Dataset",
	description="Download pre-configured example datasets. Returns file_path and structure. Available: small_netcdf, ocean_basin, air_temperature, ocean_model, era_interim"
	))

	# Tool 2: List Structure
	tools.append(gr.Interface(
	fn=list_structure,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/tmp/file.nc",
	info="Path to the HDF5/NetCDF file"
	)
	],
	outputs=gr.JSON(label="File Structure"),
	api_name="list_structure",
	title="List Structure",
	description="List complete hierarchical structure of the file"
	))

	# Tool 3: Read Dataset
	tools.append(gr.Interface(
	fn=read_dataset,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/tmp/file.nc",
	info="Path to the HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Dataset Path",
	placeholder="temperature",
	info="Path to dataset/variable (e.g., 'temperature', '/group/data')"
	),
	gr.Textbox(
	label="Slice (optional)",
	placeholder="0:10, :, 5",
	value="",
	info="NumPy-style slice notation"
	)
	],
	outputs=gr.JSON(label="Dataset Contents"),
	api_name="read_dataset",
	title="Read Dataset",
	description="Read data from a specific dataset/variable. Default limits: 100 MB (safe), 500 MB (warning), 2000 MB (max). Override with memory_limit_mb if needed."
	))

	# Tool 4: Get Dataset Preview
	tools.append(gr.Interface(
	fn=get_dataset_preview,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/tmp/file.nc",
	info="Path to the HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Dataset Path",
	placeholder="temperature",
	info="Path to dataset/variable"
	),
	gr.Number(
	label="Preview Size",
	value=10,
	minimum=5,
	maximum=100,
	info="Number of elements to preview"
	)
	],
	outputs=gr.JSON(label="Dataset Preview"),
	api_name="get_dataset_preview",
	title="Preview Dataset",
	description="Safely preview dataset without loading full array"
	))

	# Tool 5: Get Attributes
	tools.append(gr.Interface(
	fn=get_attributes,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/tmp/file.nc",
	info="Path to the HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Object Path (optional)",
	placeholder="",
	value="",
	info="Path to dataset/group (empty for file-level attributes)"
	)
	],
	outputs=gr.JSON(label="Attributes"),
	api_name="get_attributes",
	title="Get Attributes",
	description="Get metadata attributes for file, group, or dataset"
	))

	# Tool 6: Get Dimensions
	tools.append(gr.Interface(
	fn=get_dimensions,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/tmp/file.nc",
	info="Path to the HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Dataset Path (optional)",
	placeholder="",
	value="",
	info="Path to dataset (empty for NetCDF to see all dimensions)"
	)
	],
	outputs=gr.JSON(label="Dimension Information"),
	api_name="get_dimensions",
	title="Get Dimensions",
	description="Get dimension and shape information"
	))

	# Tool 7: Compute Statistics
	tools.append(gr.Interface(
	fn=compute_statistics,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/tmp/file.nc",
	info="Path to the HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Dataset Path",
	placeholder="temperature",
	info="Path to dataset/variable"
	),
	gr.Textbox(
	label="Slice (optional)",
	placeholder="0:10, :, :",
	value="",
	info="Compute statistics over a subset"
	)
	],
	outputs=gr.JSON(label="Statistical Results"),
	api_name="compute_statistics",
	title="Compute Statistics",
	description="Calculate statistical measures (min, max, mean, std, median). Default limits: 100 MB (safe), 500 MB (warning), 2000 MB (max)."
	))

	# Tool 8: Export Data
	tools.append(gr.Interface(
	fn=export_data,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/tmp/file.nc",
	info="Path to the HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Dataset Path",
	placeholder="temperature",
	info="Path to dataset/variable"
	),
	gr.Radio(
	choices=["csv", "json"],
	value="csv",
	label="Format",
	info="Export format"
	),
	gr.Textbox(
	label="Slice (optional)",
	placeholder="0:100, :, :",
	value="",
	info="Export a subset of data"
	),
	gr.Number(
	label="Max Rows (CSV only)",
	value=100000,
	minimum=1000,
	maximum=1000000,
	info="Safety limit for CSV files"
	)
	],
	outputs=[gr.JSON(label="Export Status"), gr.File(label="Download File")],
	api_name="export_data",
	title="Export Data",
	description="Export dataset to CSV or JSON format. Default limits: 100 MB (safe), 500 MB (warning), 2000 MB (max)."
	))

	# Tool 9: Create Histogram
	tools.append(gr.Interface(
	fn=create_histogram,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/tmp/file.nc",
	info="Path to the HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Dataset Path",
	placeholder="temperature",
	info="Path to dataset/variable"
	),
	gr.Textbox(
	label="Slice (optional)",
	placeholder="0, :, :",
	value="",
	info="Optional subsetting before histogram"
	),
	gr.Textbox(
	label="Title (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic title"
	),
	gr.Textbox(
	label="X-axis Label (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic x-axis label"
	),
	gr.Number(
	label="Number of Bins",
	value=50,
	minimum=5,
	maximum=200,
	info="Number of bins for histogram (default: 50)"
	)
	],
	outputs=[
	gr.JSON(label="Histogram Status & Statistics"),
	gr.File(label="Download Interactive HTML")
	],
	api_name="create_histogram",
	title="Create Histogram",
	description="Generate histogram showing distribution of values. Returns interactive plot and downloadable HTML file."
	))

	# Tool 10: Line Plot
	tools.append(gr.Interface(
	fn=create_line_plot,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/path/to/file.nc",
	info="Path to HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Y Dataset Path",
	placeholder="temperature",
	info="Path to Y-axis dataset/variable"
	),
	gr.Textbox(
	label="Slice (optional)",
	placeholder="0:100, :, 5",
	value="",
	info="Numpy-style slice notation"
	),
	gr.Textbox(
	label="Title (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic title"
	),
	gr.Textbox(
	label="X-axis Label (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic x-axis label"
	),
	gr.Textbox(
	label="Y-axis Label (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic y-axis label"
	),
	gr.Textbox(
	label="X Dataset Path (optional)",
	placeholder="time",
	value="",
	info="Path to X-axis data (if empty, uses indices)"
	),
	gr.Number(
	label="Moving Average Window (optional)",
	value=0,
	minimum=0,
	maximum=1000,
	info="Window size for smoothing (0 or 1 = no smoothing, 2-1000 = apply moving average)"
	)
	],
	outputs=[
	gr.JSON(label="Line Plot Status & Statistics"),
	gr.File(label="Download Interactive HTML")
	],
	api_name="create_line_plot",
	title="Create Line Plot",
	description="Generate line plot for 1D data or time series. Returns statistics and downloadable HTML file."
	))

	# Tool 11: Heatmap
	tools.append(gr.Interface(
	fn=create_heatmap,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/path/to/file.nc",
	info="Path to HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Dataset Path",
	placeholder="temperature",
	info="Path to 2D dataset/variable"
	),
	gr.Textbox(
	label="Slice (optional)",
	placeholder="0, :, :",
	value="",
	info="Numpy-style slice notation (must result in 2D)"
	),
	gr.Textbox(
	label="Title (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic title"
	),
	gr.Textbox(
	label="X-axis Label (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic x-axis label"
	),
	gr.Textbox(
	label="Y-axis Label (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic y-axis label"
	),
	gr.Radio(
	label="Colorscale",
	choices=["Viridis", "Plasma", "Inferno", "Magma", "Cividis", "Blues", "Reds", "YlOrRd", "RdBu"],
	value="Viridis",
	info="Plotly colorscale for heatmap"
	)
	],
	outputs=[
	gr.JSON(label="Heatmap Status & Statistics"),
	gr.File(label="Download Interactive HTML")
	],
	api_name="create_heatmap",
	title="Create Heatmap",
	description="Generate 2D heatmap visualization. Returns statistics and downloadable HTML file."
	))

	# Tool 12: Scatter Plot
	tools.append(gr.Interface(
	fn=create_scatter_plot,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/path/to/file.nc",
	info="Path to HDF5/NetCDF file"
	),
	gr.Textbox(
	label="X Dataset Path",
	placeholder="temperature",
	info="Path to X-axis dataset/variable"
	),
	gr.Textbox(
	label="Y Dataset Path",
	placeholder="salinity",
	info="Path to Y-axis dataset/variable"
	),
	gr.Textbox(
	label="X Slice (optional)",
	placeholder="0, :, :",
	value="",
	info="Numpy-style slice for X data (can differ from Y slice)"
	),
	gr.Textbox(
	label="Y Slice (optional)",
	placeholder="1, :, :",
	value="",
	info="Numpy-style slice for Y data (can differ from X slice)"
	),
	gr.Textbox(
	label="Title (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic title"
	),
	gr.Textbox(
	label="X-axis Label (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic x-axis label"
	),
	gr.Textbox(
	label="Y-axis Label (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic y-axis label"
	)
	],
	outputs=[
	gr.JSON(label="Scatter Plot Status & Statistics"),
	gr.File(label="Download Interactive HTML")
	],
	api_name="create_scatter_plot",
	title="Create Scatter Plot",
	description="Generate scatter plot comparing two variables. Includes correlation and regression. Returns statistics and downloadable HTML file."
	))

	# Tool 13: Contour Plot
	tools.append(gr.Interface(
	fn=create_contour_plot,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/path/to/file.nc",
	info="Path to HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Dataset Path",
	placeholder="elevation",
	info="Path to 2D dataset/variable"
	),
	gr.Textbox(
	label="Slice (optional)",
	placeholder="0, :, :",
	value="",
	info="Numpy-style slice notation (must result in 2D)"
	),
	gr.Textbox(
	label="Title (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic title"
	),
	gr.Textbox(
	label="X-axis Label (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic x-axis label"
	),
	gr.Textbox(
	label="Y-axis Label (optional)",
	placeholder="Leave blank for automatic",
	value="",
	info="Override automatic y-axis label"
	),
	gr.Number(
	label="Number of Contours",
	value=20,
	minimum=5,
	maximum=50,
	info="Number of contour levels"
	),
	gr.Radio(
	label="Colorscale",
	choices=["Viridis", "Plasma", "Inferno", "Magma", "Cividis", "Blues", "Reds", "YlOrRd", "RdBu"],
	value="Viridis",
	info="Plotly colorscale for contours"
	)
	],
	outputs=[
	gr.JSON(label="Contour Plot Status & Statistics"),
	gr.File(label="Download Interactive HTML")
	],
	api_name="create_contour_plot",
	title="Create Contour Plot",
	description="Generate contour plot for 2D data. Returns statistics and downloadable HTML file."
	))

	# Tool 14: Coordinate to Index
	tools.append(gr.Interface(
	fn=coordinate_to_index,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/path/to/file.nc",
	info="Path to HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Coordinate Variable",
	placeholder="lat",
	info="Name of coordinate variable (e.g., 'lat', 'lon', 'time')"
	),
	gr.Number(
	label="Coordinate Value",
	value=0.0,
	info="Value to find in coordinate array"
	)
	],
	outputs=gr.JSON(label="Index Lookup Result"),
	api_name="coordinate_to_index",
	title="Coordinate to Index",
	description="Convert coordinate value to array index. E.g., 'what index corresponds to latitude 35.5?'"
	))

	# Tool 15: Index to Coordinate
	tools.append(gr.Interface(
	fn=index_to_coordinate,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/path/to/file.nc",
	info="Path to HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Coordinate Variable",
	placeholder="lat",
	info="Name of coordinate variable (e.g., 'lat', 'lon', 'time')"
	),
	gr.Number(
	label="Array Index",
	value=0,
	minimum=0,
	info="Index to look up in coordinate array"
	)
	],
	outputs=gr.JSON(label="Coordinate Lookup Result"),
	api_name="index_to_coordinate",
	title="Index to Coordinate",
	description="Convert array index to coordinate value. E.g., 'what latitude is at index 142?'"
	))

	# Tool 16: Query Content (Semantic Search)
	tools.append(gr.Interface(
	fn=query_content,
	inputs=[
	gr.Textbox(
	label="File Path",
	placeholder="/path/to/file.nc",
	info="Path to HDF5/NetCDF file"
	),
	gr.Textbox(
	label="Search Query",
	placeholder="temperature",
	info="Search term to find in dataset names, attributes, and metadata"
	)
	],
	outputs=gr.JSON(label="Search Results"),
	api_name="query_content",
	title="Query Content (Semantic Search)",
	description="Search through file contents to find relevant datasets. Searches names, attributes, dimensions, and metadata."
	))

	return tools


	# ============================================
	# MAIN LAUNCH
	# ============================================

	if __name__ == "__main__":
	# Build MCP tools (these ARE exposed to MCP)
	mcp_tools = build_mcp_tools()

	# Create organized interface with nested tabs
	# All individual tools still exposed to MCP via api_name

	with gr.Blocks(title="HDF5/NetCDF MCP Server") as demo:
	gr.Markdown("# HDF5/NetCDF MCP Server")
	gr.Markdown("Tools for analyzing HDF5 and NetCDF scientific data files")

	with gr.Tab("📥 File Loading"):
	with gr.Tabs():
	with gr.Tab("Download from URL"):
	mcp_tools[0].render() # Download File
	with gr.Tab("Local File"):
	mcp_tools[1].render() # UI Helper - Local File
	with gr.Tab("Example Datasets"):
	gr.Markdown("### 📚 Quick Start with Example Datasets")
	gr.Markdown("Pre-configured datasets from the xarray-data repository for testing and exploration.")
	mcp_tools[2].render() # Download Example

	with gr.Tab("📋 Data Analysis & Utilities"):
	with gr.Tabs():
	with gr.Tab("Structure & Info"):
	mcp_tools[3].render() # List Structure
	with gr.Tab("Read Dataset"):
	mcp_tools[4].render() # Read Dataset
	with gr.Tab("Preview Dataset"):
	mcp_tools[5].render() # Preview Dataset
	with gr.Tab("Get Attributes"):
	mcp_tools[6].render() # Get Attributes
	with gr.Tab("Get Dimensions"):
	mcp_tools[7].render() # Get Dimensions
	with gr.Tab("Compute Statistics"):
	mcp_tools[8].render() # Compute Statistics
	with gr.Tab("Export Data"):
	mcp_tools[9].render() # Export Data
	with gr.Tab("Coordinate → Index"):
	mcp_tools[15].render() # Coordinate to Index
	with gr.Tab("Index → Coordinate"):
	mcp_tools[16].render() # Index to Coordinate
	with gr.Tab("Search Content"):
	mcp_tools[17].render() # Query Content

	with gr.Tab("📊 Visualizations"):
	with gr.Tabs():
	with gr.Tab("Histogram"):
	mcp_tools[10].render() # Create Histogram
	with gr.Tab("Line Plot"):
	mcp_tools[11].render() # Create Line Plot
	with gr.Tab("Heatmap"):
	mcp_tools[12].render() # Create Heatmap
	with gr.Tab("Scatter Plot"):
	mcp_tools[13].render() # Create Scatter Plot
	with gr.Tab("Contour Plot"):
	mcp_tools[14].render() # Create Contour Plot

	# Launch with MCP server enabled
	demo.launch(mcp_server=True)