Spaces:
Build error
Build error
| import streamlit as st | |
| import xarray as xr | |
| import pandas as pd | |
| import numpy as np | |
| import io | |
| import tempfile | |
| import os | |
| import plotly.express as px | |
| import shutil | |
| import gc | |
| import psutil | |
| import dask | |
| from dask.diagnostics import ProgressBar | |
| # Set dask scheduler | |
| dask.config.set(scheduler='threads') | |
| # Enable garbage collection | |
| gc.enable() | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="NetCDF File Manager", | |
| page_icon="π", | |
| layout="wide" | |
| ) | |
| # Initialize session state | |
| if 'page' not in st.session_state: | |
| st.session_state['page'] = 'home' | |
| st.session_state['datasets'] = [] | |
| st.session_state['merged_ds'] = None | |
| st.session_state['time_var'] = None | |
| st.session_state['single_ds'] = None | |
| def optimize_dataset(ds): | |
| """Optimize dataset using chunking""" | |
| return ds.chunk({'time': -1}) | |
| def process_uploaded_file(uploaded_file): | |
| """Fast and proxy-compatible file processing""" | |
| with tempfile.NamedTemporaryFile(suffix='.nc', delete=False) as tmp_file: | |
| # Process in smaller chunks for proxy stability | |
| buffer_size = 2 * 1024 * 1024 # 2MB chunks | |
| data = uploaded_file.getvalue() | |
| for i in range(0, len(data), buffer_size): | |
| chunk = data[i:i + buffer_size] | |
| tmp_file.write(chunk) | |
| tmp_file.flush() | |
| # Optimized dataset loading | |
| ds = xr.open_dataset( | |
| tmp_file.name, | |
| engine='netcdf4', | |
| chunks={'time': 'auto'}, | |
| cache=False, | |
| decode_times=False | |
| ) | |
| os.unlink(tmp_file.name) | |
| ds = xr.decode_cf(ds) | |
| return ds | |
| def find_time_variable(ds): | |
| """Find time variable in dataset""" | |
| time_vars = ['valid_time', 'time', 'TIME', 'datetime', 'date', 'Time'] | |
| return next((var for var in time_vars if var in ds.variables), None) | |
| def get_variable_units(ds, var_name): | |
| """Get variable units""" | |
| return ds[var_name].attrs.get('units', 'No unit specified') | |
| def convert_point_nc_to_excel(ds, selected_vars, lat, lon, time_var): | |
| """Convert to Excel with optimized memory usage""" | |
| ds_point = ds.sel(latitude=lat, longitude=lon, method='nearest') | |
| time_values = pd.to_datetime(ds_point[time_var].values) | |
| data = { | |
| 'observation_time (UTC)': time_values.strftime('%Y-%m-%d %H:%M'), | |
| 'longitude (DD)': [lon] * len(time_values), | |
| 'latitude (DD)': [lat] * len(time_values) | |
| } | |
| for var_name in selected_vars: | |
| unit = get_variable_units(ds, var_name) | |
| column_name = f"{var_name} ({unit})" | |
| var_data = ds_point[var_name].values | |
| data[column_name] = var_data.flatten() if var_data.size > 1 else [var_data.item()] * len(time_values) | |
| return pd.DataFrame(data) | |
| def merge_datasets(datasets): | |
| """Faster dataset merging""" | |
| if not datasets: | |
| return None | |
| if len(datasets) == 1: | |
| return datasets[0] | |
| with dask.config.set(scheduler='threads'): | |
| merged_ds = xr.merge( | |
| datasets, | |
| combine_attrs="override", | |
| compat="override" | |
| ) | |
| return merged_ds | |
| def visualize_data(df, selected_vars): | |
| """Create interactive plots""" | |
| df['formatted_time'] = pd.to_datetime(df['observation_time (UTC)']).dt.strftime('%d-%m-%y %H:%M') | |
| fig = px.line(df, x='formatted_time', y=selected_vars, | |
| title='Variables Over Time', | |
| color_discrete_sequence=px.colors.qualitative.Set1) | |
| fig.update_layout( | |
| showlegend=True, | |
| legend=dict( | |
| orientation="v", | |
| yanchor="top", | |
| y=1, | |
| xanchor="right", | |
| x=1.1, | |
| bgcolor="white", | |
| bordercolor="Black", | |
| borderwidth=1 | |
| ), | |
| hovermode='x unified', | |
| plot_bgcolor='white', | |
| paper_bgcolor='white', | |
| xaxis=dict( | |
| showgrid=True, | |
| gridwidth=1, | |
| gridcolor='LightGray', | |
| type='category', | |
| showline=True, | |
| linewidth=1, | |
| linecolor='black', | |
| mirror=True, | |
| tickangle=-90 | |
| ), | |
| yaxis=dict( | |
| showgrid=True, | |
| gridwidth=1, | |
| gridcolor='LightGray', | |
| showline=True, | |
| linewidth=1, | |
| linecolor='black', | |
| mirror=True | |
| ), | |
| margin=dict(l=80, r=150, t=100, b=100), | |
| width=900, | |
| height=600 | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| buffer = io.StringIO() | |
| fig.write_html(buffer) | |
| st.download_button( | |
| label="π Download Plot", | |
| data=buffer.getvalue(), | |
| file_name="plot.html", | |
| mime="text/html" | |
| ) | |
| def process_dataset(ds): | |
| """Process and display dataset information""" | |
| time_var = find_time_variable(ds) | |
| if not time_var: | |
| st.error("No time variable found in the dataset.") | |
| return | |
| st.write("### Dataset Information:") | |
| st.write(f"Dimensions: {dict(ds.dims)}") | |
| var_info = [{ | |
| 'Variable': var, | |
| 'Unit': get_variable_units(ds, var), | |
| 'Dimensions': ', '.join(ds[var].dims), | |
| 'Description': ds[var].attrs.get('long_name', 'No description available') | |
| } for var in ds.variables] | |
| st.table(pd.DataFrame(var_info)) | |
| available_vars = [var for var in ds.variables | |
| if var not in ['latitude', 'longitude', 'lat', 'lon', time_var]] | |
| selected_vars = st.multiselect("Select variables to extract:", available_vars) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| latitude = st.number_input("Latitude (DD)", | |
| value=float(ds.latitude.mean()), | |
| min_value=float(ds.latitude.min()), | |
| max_value=float(ds.latitude.max())) | |
| with col2: | |
| longitude = st.number_input("Longitude (DD)", | |
| value=float(ds.longitude.mean()), | |
| min_value=float(ds.longitude.min()), | |
| max_value=float(ds.longitude.max())) | |
| action = st.radio("Choose action:", ["Visualize", "Excel"]) | |
| if selected_vars and st.button("Generate"): | |
| with st.spinner('Processing...'): | |
| df = convert_point_nc_to_excel(ds, selected_vars, latitude, longitude, time_var) | |
| if action == "Visualize": | |
| visualize_data(df, df.columns[3:]) | |
| else: | |
| st.write("### Data Preview:") | |
| st.dataframe(df.head()) | |
| output = io.BytesIO() | |
| with pd.ExcelWriter(output, engine='xlsxwriter') as writer: | |
| df.to_excel(writer, index=False) | |
| st.download_button( | |
| label="π₯ Download Excel", | |
| data=output.getvalue(), | |
| file_name=f"data_{latitude}_{longitude}.xlsx", | |
| mime="application/vnd.ms-excel" | |
| ) | |
| st.success("β File ready for download!") | |
| # Navigation | |
| st.title('π NetCDF File Manager') | |
| cols = st.columns([1,1,1,1]) | |
| with cols[0]: | |
| if st.button('π Home'): | |
| st.session_state['page'] = 'home' | |
| st.rerun() | |
| with cols[1]: | |
| if st.button('π₯ Convert NC to Excel'): | |
| st.session_state['page'] = 'convert' | |
| st.rerun() | |
| with cols[2]: | |
| if st.button('π Merge NC Files'): | |
| st.session_state['page'] = 'merge' | |
| st.rerun() | |
| with cols[3]: | |
| if st.button('π Reset'): | |
| for key in st.session_state.keys(): | |
| del st.session_state[key] | |
| st.session_state['page'] = 'home' | |
| st.rerun() | |
| st.markdown("---") | |
| # Page Content | |
| if st.session_state['page'] == 'home': | |
| st.header("Welcome to NetCDF File Manager!") | |
| st.write(""" | |
| ### Choose an operation from the top navigation: | |
| - *Convert NC to Excel*: Convert single NC file to Excel format | |
| - *Merge NC Files*: Merge multiple NC files and export/visualize | |
| - *Reset*: Clear all data and start fresh | |
| ### Features Available: | |
| - Single file conversion | |
| - Multiple file merging | |
| - Data visualization | |
| - Excel export | |
| - Original units preservation | |
| - Time series analysis | |
| """) | |
| elif st.session_state['page'] == 'convert': | |
| st.header("Convert NC File to Excel") | |
| uploaded_file = st.file_uploader("Upload your .nc file", type='nc') | |
| if uploaded_file: | |
| with st.spinner('Processing NC file...'): | |
| st.session_state['single_ds'] = process_uploaded_file(uploaded_file) | |
| process_dataset(st.session_state['single_ds']) | |
| elif st.session_state['page'] == 'merge': | |
| st.header("Merge Multiple NC Files") | |
| # Initialize analysis state if not exists | |
| if 'analysis_mode' not in st.session_state: | |
| st.session_state['analysis_mode'] = False | |
| uploaded_files = st.file_uploader("Upload multiple .nc files", type='nc', accept_multiple_files=True) | |
| if uploaded_files: | |
| if len(uploaded_files) < 2: | |
| st.warning("Please upload at least 2 files to merge") | |
| else: | |
| st.write(f"Number of files to merge: {len(uploaded_files)}") | |
| if not st.session_state['analysis_mode']: | |
| if st.button("π Process and Merge Files"): | |
| datasets = [] | |
| progress_text = st.empty() | |
| for i, file in enumerate(uploaded_files, 1): | |
| progress_text.text(f"Processing file {i}/{len(uploaded_files)}") | |
| try: | |
| ds = process_uploaded_file(file) | |
| datasets.append(ds) | |
| except Exception as e: | |
| st.error(f"Error processing file {i}: {str(e)}") | |
| break | |
| if len(datasets) == len(uploaded_files): | |
| st.session_state['merged_ds'] = merge_datasets(datasets) | |
| if st.session_state['merged_ds'] is not None: | |
| st.success("β Files merged successfully!") | |
| st.session_state['analysis_mode'] = True | |
| st.rerun() | |
| if st.session_state['analysis_mode'] and st.session_state['merged_ds'] is not None: | |
| # Download option for merged NC file | |
| nc_data = st.session_state['merged_ds'].to_netcdf() | |
| st.download_button( | |
| label="πΎ Download Merged NC File", | |
| data=nc_data, | |
| file_name="merged_data.nc", | |
| mime="application/x-netcdf" | |
| ) | |
| time_var = find_time_variable(st.session_state['merged_ds']) | |
| if time_var: | |
| st.write("### Dataset Information:") | |
| st.write(f"Dimensions: {dict(st.session_state['merged_ds'].dims)}") | |
| var_info = [{ | |
| 'Variable': var, | |
| 'Unit': get_variable_units(st.session_state['merged_ds'], var), | |
| 'Dimensions': ', '.join(st.session_state['merged_ds'][var].dims), | |
| 'Description': st.session_state['merged_ds'][var].attrs.get('long_name', 'No description available') | |
| } for var in st.session_state['merged_ds'].variables] | |
| st.table(pd.DataFrame(var_info)) | |
| available_vars = [var for var in st.session_state['merged_ds'].variables | |
| if var not in ['latitude', 'longitude', 'lat', 'lon', time_var]] | |
| selected_vars = st.multiselect("Select variables:", available_vars) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| latitude = st.number_input("Latitude (DD)", | |
| value=float(st.session_state['merged_ds'].latitude.mean()), | |
| min_value=float(st.session_state['merged_ds'].latitude.min()), | |
| max_value=float(st.session_state['merged_ds'].latitude.max())) | |
| with col2: | |
| longitude = st.number_input("Longitude (DD)", | |
| value=float(st.session_state['merged_ds'].longitude.mean()), | |
| min_value=float(st.session_state['merged_ds'].longitude.min()), | |
| max_value=float(st.session_state['merged_ds'].longitude.max())) | |
| action = st.radio("Choose action:", ["Visualize", "Excel"]) | |
| if selected_vars and st.button("Generate Results"): | |
| with st.spinner('Processing...'): | |
| df = convert_point_nc_to_excel(st.session_state['merged_ds'], | |
| selected_vars, | |
| latitude, | |
| longitude, | |
| time_var) | |
| if action == "Visualize": | |
| visualize_data(df, df.columns[3:]) | |
| else: | |
| st.write("### Data Preview:") | |
| st.dataframe(df.head()) | |
| output = io.BytesIO() | |
| with pd.ExcelWriter(output, engine='xlsxwriter') as writer: | |
| df.to_excel(writer, index=False) | |
| st.download_button( | |
| label="π₯ Download Excel", | |
| data=output.getvalue(), | |
| file_name=f"merged_data_{latitude}_{longitude}.xlsx", | |
| mime="application/vnd.ms-excel" | |
| ) | |
| if st.button("π Start New Merge"): | |
| st.session_state['analysis_mode'] = False | |
| st.rerun() | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| ### π For Support And Assistance: | |
| Contact: | |
| - Harshitha Gunnam | |
| gunnamharshitha2@gmail.com | |
| - Varun Ravichander | |
| varunravichander2007@gmail.com | |
| """) | |