import streamlit as st import xarray as xr import pandas as pd import numpy as np import io import tempfile import os import plotly.express as px import shutil import gc import psutil import dask from dask.diagnostics import ProgressBar # Set dask scheduler dask.config.set(scheduler='threads') # Enable garbage collection gc.enable() # Page configuration st.set_page_config( page_title="NetCDF File Manager", page_icon="📊", layout="wide" ) # Initialize session state if 'page' not in st.session_state: st.session_state['page'] = 'home' st.session_state['datasets'] = [] st.session_state['merged_ds'] = None st.session_state['time_var'] = None st.session_state['single_ds'] = None def optimize_dataset(ds): """Optimize dataset using chunking""" return ds.chunk({'time': -1}) def process_uploaded_file(uploaded_file): """Fast and proxy-compatible file processing""" with tempfile.NamedTemporaryFile(suffix='.nc', delete=False) as tmp_file: # Process in smaller chunks for proxy stability buffer_size = 2 * 1024 * 1024 # 2MB chunks data = uploaded_file.getvalue() for i in range(0, len(data), buffer_size): chunk = data[i:i + buffer_size] tmp_file.write(chunk) tmp_file.flush() # Optimized dataset loading ds = xr.open_dataset( tmp_file.name, engine='netcdf4', chunks={'time': 'auto'}, cache=False, decode_times=False ) os.unlink(tmp_file.name) ds = xr.decode_cf(ds) return ds def find_time_variable(ds): """Find time variable in dataset""" time_vars = ['valid_time', 'time', 'TIME', 'datetime', 'date', 'Time'] return next((var for var in time_vars if var in ds.variables), None) def get_variable_units(ds, var_name): """Get variable units""" return ds[var_name].attrs.get('units', 'No unit specified') def convert_point_nc_to_excel(ds, selected_vars, lat, lon, time_var): """Convert to Excel with optimized memory usage""" ds_point = ds.sel(latitude=lat, longitude=lon, method='nearest') time_values = pd.to_datetime(ds_point[time_var].values) data = { 'observation_time (UTC)': time_values.strftime('%Y-%m-%d %H:%M'), 'longitude (DD)': [lon] * len(time_values), 'latitude (DD)': [lat] * len(time_values) } for var_name in selected_vars: unit = get_variable_units(ds, var_name) column_name = f"{var_name} ({unit})" var_data = ds_point[var_name].values data[column_name] = var_data.flatten() if var_data.size > 1 else [var_data.item()] * len(time_values) return pd.DataFrame(data) def merge_datasets(datasets): """Faster dataset merging""" if not datasets: return None if len(datasets) == 1: return datasets[0] with dask.config.set(scheduler='threads'): merged_ds = xr.merge( datasets, combine_attrs="override", compat="override" ) return merged_ds def visualize_data(df, selected_vars): """Create interactive plots""" df['formatted_time'] = pd.to_datetime(df['observation_time (UTC)']).dt.strftime('%d-%m-%y %H:%M') fig = px.line(df, x='formatted_time', y=selected_vars, title='Variables Over Time', color_discrete_sequence=px.colors.qualitative.Set1) fig.update_layout( showlegend=True, legend=dict( orientation="v", yanchor="top", y=1, xanchor="right", x=1.1, bgcolor="white", bordercolor="Black", borderwidth=1 ), hovermode='x unified', plot_bgcolor='white', paper_bgcolor='white', xaxis=dict( showgrid=True, gridwidth=1, gridcolor='LightGray', type='category', showline=True, linewidth=1, linecolor='black', mirror=True, tickangle=-90 ), yaxis=dict( showgrid=True, gridwidth=1, gridcolor='LightGray', showline=True, linewidth=1, linecolor='black', mirror=True ), margin=dict(l=80, r=150, t=100, b=100), width=900, height=600 ) st.plotly_chart(fig, use_container_width=True) buffer = io.StringIO() fig.write_html(buffer) st.download_button( label="📊 Download Plot", data=buffer.getvalue(), file_name="plot.html", mime="text/html" ) def process_dataset(ds): """Process and display dataset information""" time_var = find_time_variable(ds) if not time_var: st.error("No time variable found in the dataset.") return st.write("### Dataset Information:") st.write(f"Dimensions: {dict(ds.dims)}") var_info = [{ 'Variable': var, 'Unit': get_variable_units(ds, var), 'Dimensions': ', '.join(ds[var].dims), 'Description': ds[var].attrs.get('long_name', 'No description available') } for var in ds.variables] st.table(pd.DataFrame(var_info)) available_vars = [var for var in ds.variables if var not in ['latitude', 'longitude', 'lat', 'lon', time_var]] selected_vars = st.multiselect("Select variables to extract:", available_vars) col1, col2 = st.columns(2) with col1: latitude = st.number_input("Latitude (DD)", value=float(ds.latitude.mean()), min_value=float(ds.latitude.min()), max_value=float(ds.latitude.max())) with col2: longitude = st.number_input("Longitude (DD)", value=float(ds.longitude.mean()), min_value=float(ds.longitude.min()), max_value=float(ds.longitude.max())) action = st.radio("Choose action:", ["Visualize", "Excel"]) if selected_vars and st.button("Generate"): with st.spinner('Processing...'): df = convert_point_nc_to_excel(ds, selected_vars, latitude, longitude, time_var) if action == "Visualize": visualize_data(df, df.columns[3:]) else: st.write("### Data Preview:") st.dataframe(df.head()) output = io.BytesIO() with pd.ExcelWriter(output, engine='xlsxwriter') as writer: df.to_excel(writer, index=False) st.download_button( label="📥 Download Excel", data=output.getvalue(), file_name=f"data_{latitude}_{longitude}.xlsx", mime="application/vnd.ms-excel" ) st.success("✅ File ready for download!") # Navigation st.title('🌟 NetCDF File Manager') cols = st.columns([1,1,1,1]) with cols[0]: if st.button('🏠 Home'): st.session_state['page'] = 'home' st.rerun() with cols[1]: if st.button('📥 Convert NC to Excel'): st.session_state['page'] = 'convert' st.rerun() with cols[2]: if st.button('🔄 Merge NC Files'): st.session_state['page'] = 'merge' st.rerun() with cols[3]: if st.button('🔄 Reset'): for key in st.session_state.keys(): del st.session_state[key] st.session_state['page'] = 'home' st.rerun() st.markdown("---") # Page Content if st.session_state['page'] == 'home': st.header("Welcome to NetCDF File Manager!") st.write(""" ### Choose an operation from the top navigation: - *Convert NC to Excel*: Convert single NC file to Excel format - *Merge NC Files*: Merge multiple NC files and export/visualize - *Reset*: Clear all data and start fresh ### Features Available: - Single file conversion - Multiple file merging - Data visualization - Excel export - Original units preservation - Time series analysis """) elif st.session_state['page'] == 'convert': st.header("Convert NC File to Excel") uploaded_file = st.file_uploader("Upload your .nc file", type='nc') if uploaded_file: with st.spinner('Processing NC file...'): st.session_state['single_ds'] = process_uploaded_file(uploaded_file) process_dataset(st.session_state['single_ds']) elif st.session_state['page'] == 'merge': st.header("Merge Multiple NC Files") # Initialize analysis state if not exists if 'analysis_mode' not in st.session_state: st.session_state['analysis_mode'] = False uploaded_files = st.file_uploader("Upload multiple .nc files", type='nc', accept_multiple_files=True) if uploaded_files: if len(uploaded_files) < 2: st.warning("Please upload at least 2 files to merge") else: st.write(f"Number of files to merge: {len(uploaded_files)}") if not st.session_state['analysis_mode']: if st.button("🔄 Process and Merge Files"): datasets = [] progress_text = st.empty() for i, file in enumerate(uploaded_files, 1): progress_text.text(f"Processing file {i}/{len(uploaded_files)}") try: ds = process_uploaded_file(file) datasets.append(ds) except Exception as e: st.error(f"Error processing file {i}: {str(e)}") break if len(datasets) == len(uploaded_files): st.session_state['merged_ds'] = merge_datasets(datasets) if st.session_state['merged_ds'] is not None: st.success("✅ Files merged successfully!") st.session_state['analysis_mode'] = True st.rerun() if st.session_state['analysis_mode'] and st.session_state['merged_ds'] is not None: # Download option for merged NC file nc_data = st.session_state['merged_ds'].to_netcdf() st.download_button( label="💾 Download Merged NC File", data=nc_data, file_name="merged_data.nc", mime="application/x-netcdf" ) time_var = find_time_variable(st.session_state['merged_ds']) if time_var: st.write("### Dataset Information:") st.write(f"Dimensions: {dict(st.session_state['merged_ds'].dims)}") var_info = [{ 'Variable': var, 'Unit': get_variable_units(st.session_state['merged_ds'], var), 'Dimensions': ', '.join(st.session_state['merged_ds'][var].dims), 'Description': st.session_state['merged_ds'][var].attrs.get('long_name', 'No description available') } for var in st.session_state['merged_ds'].variables] st.table(pd.DataFrame(var_info)) available_vars = [var for var in st.session_state['merged_ds'].variables if var not in ['latitude', 'longitude', 'lat', 'lon', time_var]] selected_vars = st.multiselect("Select variables:", available_vars) col1, col2 = st.columns(2) with col1: latitude = st.number_input("Latitude (DD)", value=float(st.session_state['merged_ds'].latitude.mean()), min_value=float(st.session_state['merged_ds'].latitude.min()), max_value=float(st.session_state['merged_ds'].latitude.max())) with col2: longitude = st.number_input("Longitude (DD)", value=float(st.session_state['merged_ds'].longitude.mean()), min_value=float(st.session_state['merged_ds'].longitude.min()), max_value=float(st.session_state['merged_ds'].longitude.max())) action = st.radio("Choose action:", ["Visualize", "Excel"]) if selected_vars and st.button("Generate Results"): with st.spinner('Processing...'): df = convert_point_nc_to_excel(st.session_state['merged_ds'], selected_vars, latitude, longitude, time_var) if action == "Visualize": visualize_data(df, df.columns[3:]) else: st.write("### Data Preview:") st.dataframe(df.head()) output = io.BytesIO() with pd.ExcelWriter(output, engine='xlsxwriter') as writer: df.to_excel(writer, index=False) st.download_button( label="📥 Download Excel", data=output.getvalue(), file_name=f"merged_data_{latitude}_{longitude}.xlsx", mime="application/vnd.ms-excel" ) if st.button("🔄 Start New Merge"): st.session_state['analysis_mode'] = False st.rerun() # Footer st.markdown("---") st.markdown(""" ### 📝 For Support And Assistance: Contact: - Harshitha Gunnam gunnamharshitha2@gmail.com - Varun Ravichander varunravichander2007@gmail.com """)