nc-excel / app.py
VarunRavichander's picture
Update app.py
7a13f2d verified
import streamlit as st
import xarray as xr
import pandas as pd
import numpy as np
import io
import tempfile
import os
import plotly.express as px
import shutil
import gc
import psutil
import dask
from dask.diagnostics import ProgressBar
# Set dask scheduler
dask.config.set(scheduler='threads')
# Enable garbage collection
gc.enable()
# Page configuration
st.set_page_config(
page_title="NetCDF File Manager",
page_icon="πŸ“Š",
layout="wide"
)
# Initialize session state
if 'page' not in st.session_state:
st.session_state['page'] = 'home'
st.session_state['datasets'] = []
st.session_state['merged_ds'] = None
st.session_state['time_var'] = None
st.session_state['single_ds'] = None
def optimize_dataset(ds):
"""Optimize dataset using chunking"""
return ds.chunk({'time': -1})
def process_uploaded_file(uploaded_file):
"""Fast and proxy-compatible file processing"""
with tempfile.NamedTemporaryFile(suffix='.nc', delete=False) as tmp_file:
# Process in smaller chunks for proxy stability
buffer_size = 2 * 1024 * 1024 # 2MB chunks
data = uploaded_file.getvalue()
for i in range(0, len(data), buffer_size):
chunk = data[i:i + buffer_size]
tmp_file.write(chunk)
tmp_file.flush()
# Optimized dataset loading
ds = xr.open_dataset(
tmp_file.name,
engine='netcdf4',
chunks={'time': 'auto'},
cache=False,
decode_times=False
)
os.unlink(tmp_file.name)
ds = xr.decode_cf(ds)
return ds
def find_time_variable(ds):
"""Find time variable in dataset"""
time_vars = ['valid_time', 'time', 'TIME', 'datetime', 'date', 'Time']
return next((var for var in time_vars if var in ds.variables), None)
def get_variable_units(ds, var_name):
"""Get variable units"""
return ds[var_name].attrs.get('units', 'No unit specified')
def convert_point_nc_to_excel(ds, selected_vars, lat, lon, time_var):
"""Convert to Excel with optimized memory usage"""
ds_point = ds.sel(latitude=lat, longitude=lon, method='nearest')
time_values = pd.to_datetime(ds_point[time_var].values)
data = {
'observation_time (UTC)': time_values.strftime('%Y-%m-%d %H:%M'),
'longitude (DD)': [lon] * len(time_values),
'latitude (DD)': [lat] * len(time_values)
}
for var_name in selected_vars:
unit = get_variable_units(ds, var_name)
column_name = f"{var_name} ({unit})"
var_data = ds_point[var_name].values
data[column_name] = var_data.flatten() if var_data.size > 1 else [var_data.item()] * len(time_values)
return pd.DataFrame(data)
def merge_datasets(datasets):
"""Faster dataset merging"""
if not datasets:
return None
if len(datasets) == 1:
return datasets[0]
with dask.config.set(scheduler='threads'):
merged_ds = xr.merge(
datasets,
combine_attrs="override",
compat="override"
)
return merged_ds
def visualize_data(df, selected_vars):
"""Create interactive plots"""
df['formatted_time'] = pd.to_datetime(df['observation_time (UTC)']).dt.strftime('%d-%m-%y %H:%M')
fig = px.line(df, x='formatted_time', y=selected_vars,
title='Variables Over Time',
color_discrete_sequence=px.colors.qualitative.Set1)
fig.update_layout(
showlegend=True,
legend=dict(
orientation="v",
yanchor="top",
y=1,
xanchor="right",
x=1.1,
bgcolor="white",
bordercolor="Black",
borderwidth=1
),
hovermode='x unified',
plot_bgcolor='white',
paper_bgcolor='white',
xaxis=dict(
showgrid=True,
gridwidth=1,
gridcolor='LightGray',
type='category',
showline=True,
linewidth=1,
linecolor='black',
mirror=True,
tickangle=-90
),
yaxis=dict(
showgrid=True,
gridwidth=1,
gridcolor='LightGray',
showline=True,
linewidth=1,
linecolor='black',
mirror=True
),
margin=dict(l=80, r=150, t=100, b=100),
width=900,
height=600
)
st.plotly_chart(fig, use_container_width=True)
buffer = io.StringIO()
fig.write_html(buffer)
st.download_button(
label="πŸ“Š Download Plot",
data=buffer.getvalue(),
file_name="plot.html",
mime="text/html"
)
def process_dataset(ds):
"""Process and display dataset information"""
time_var = find_time_variable(ds)
if not time_var:
st.error("No time variable found in the dataset.")
return
st.write("### Dataset Information:")
st.write(f"Dimensions: {dict(ds.dims)}")
var_info = [{
'Variable': var,
'Unit': get_variable_units(ds, var),
'Dimensions': ', '.join(ds[var].dims),
'Description': ds[var].attrs.get('long_name', 'No description available')
} for var in ds.variables]
st.table(pd.DataFrame(var_info))
available_vars = [var for var in ds.variables
if var not in ['latitude', 'longitude', 'lat', 'lon', time_var]]
selected_vars = st.multiselect("Select variables to extract:", available_vars)
col1, col2 = st.columns(2)
with col1:
latitude = st.number_input("Latitude (DD)",
value=float(ds.latitude.mean()),
min_value=float(ds.latitude.min()),
max_value=float(ds.latitude.max()))
with col2:
longitude = st.number_input("Longitude (DD)",
value=float(ds.longitude.mean()),
min_value=float(ds.longitude.min()),
max_value=float(ds.longitude.max()))
action = st.radio("Choose action:", ["Visualize", "Excel"])
if selected_vars and st.button("Generate"):
with st.spinner('Processing...'):
df = convert_point_nc_to_excel(ds, selected_vars, latitude, longitude, time_var)
if action == "Visualize":
visualize_data(df, df.columns[3:])
else:
st.write("### Data Preview:")
st.dataframe(df.head())
output = io.BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
df.to_excel(writer, index=False)
st.download_button(
label="πŸ“₯ Download Excel",
data=output.getvalue(),
file_name=f"data_{latitude}_{longitude}.xlsx",
mime="application/vnd.ms-excel"
)
st.success("βœ… File ready for download!")
# Navigation
st.title('🌟 NetCDF File Manager')
cols = st.columns([1,1,1,1])
with cols[0]:
if st.button('🏠 Home'):
st.session_state['page'] = 'home'
st.rerun()
with cols[1]:
if st.button('πŸ“₯ Convert NC to Excel'):
st.session_state['page'] = 'convert'
st.rerun()
with cols[2]:
if st.button('πŸ”„ Merge NC Files'):
st.session_state['page'] = 'merge'
st.rerun()
with cols[3]:
if st.button('πŸ”„ Reset'):
for key in st.session_state.keys():
del st.session_state[key]
st.session_state['page'] = 'home'
st.rerun()
st.markdown("---")
# Page Content
if st.session_state['page'] == 'home':
st.header("Welcome to NetCDF File Manager!")
st.write("""
### Choose an operation from the top navigation:
- *Convert NC to Excel*: Convert single NC file to Excel format
- *Merge NC Files*: Merge multiple NC files and export/visualize
- *Reset*: Clear all data and start fresh
### Features Available:
- Single file conversion
- Multiple file merging
- Data visualization
- Excel export
- Original units preservation
- Time series analysis
""")
elif st.session_state['page'] == 'convert':
st.header("Convert NC File to Excel")
uploaded_file = st.file_uploader("Upload your .nc file", type='nc')
if uploaded_file:
with st.spinner('Processing NC file...'):
st.session_state['single_ds'] = process_uploaded_file(uploaded_file)
process_dataset(st.session_state['single_ds'])
elif st.session_state['page'] == 'merge':
st.header("Merge Multiple NC Files")
# Initialize analysis state if not exists
if 'analysis_mode' not in st.session_state:
st.session_state['analysis_mode'] = False
uploaded_files = st.file_uploader("Upload multiple .nc files", type='nc', accept_multiple_files=True)
if uploaded_files:
if len(uploaded_files) < 2:
st.warning("Please upload at least 2 files to merge")
else:
st.write(f"Number of files to merge: {len(uploaded_files)}")
if not st.session_state['analysis_mode']:
if st.button("πŸ”„ Process and Merge Files"):
datasets = []
progress_text = st.empty()
for i, file in enumerate(uploaded_files, 1):
progress_text.text(f"Processing file {i}/{len(uploaded_files)}")
try:
ds = process_uploaded_file(file)
datasets.append(ds)
except Exception as e:
st.error(f"Error processing file {i}: {str(e)}")
break
if len(datasets) == len(uploaded_files):
st.session_state['merged_ds'] = merge_datasets(datasets)
if st.session_state['merged_ds'] is not None:
st.success("βœ… Files merged successfully!")
st.session_state['analysis_mode'] = True
st.rerun()
if st.session_state['analysis_mode'] and st.session_state['merged_ds'] is not None:
# Download option for merged NC file
nc_data = st.session_state['merged_ds'].to_netcdf()
st.download_button(
label="πŸ’Ύ Download Merged NC File",
data=nc_data,
file_name="merged_data.nc",
mime="application/x-netcdf"
)
time_var = find_time_variable(st.session_state['merged_ds'])
if time_var:
st.write("### Dataset Information:")
st.write(f"Dimensions: {dict(st.session_state['merged_ds'].dims)}")
var_info = [{
'Variable': var,
'Unit': get_variable_units(st.session_state['merged_ds'], var),
'Dimensions': ', '.join(st.session_state['merged_ds'][var].dims),
'Description': st.session_state['merged_ds'][var].attrs.get('long_name', 'No description available')
} for var in st.session_state['merged_ds'].variables]
st.table(pd.DataFrame(var_info))
available_vars = [var for var in st.session_state['merged_ds'].variables
if var not in ['latitude', 'longitude', 'lat', 'lon', time_var]]
selected_vars = st.multiselect("Select variables:", available_vars)
col1, col2 = st.columns(2)
with col1:
latitude = st.number_input("Latitude (DD)",
value=float(st.session_state['merged_ds'].latitude.mean()),
min_value=float(st.session_state['merged_ds'].latitude.min()),
max_value=float(st.session_state['merged_ds'].latitude.max()))
with col2:
longitude = st.number_input("Longitude (DD)",
value=float(st.session_state['merged_ds'].longitude.mean()),
min_value=float(st.session_state['merged_ds'].longitude.min()),
max_value=float(st.session_state['merged_ds'].longitude.max()))
action = st.radio("Choose action:", ["Visualize", "Excel"])
if selected_vars and st.button("Generate Results"):
with st.spinner('Processing...'):
df = convert_point_nc_to_excel(st.session_state['merged_ds'],
selected_vars,
latitude,
longitude,
time_var)
if action == "Visualize":
visualize_data(df, df.columns[3:])
else:
st.write("### Data Preview:")
st.dataframe(df.head())
output = io.BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
df.to_excel(writer, index=False)
st.download_button(
label="πŸ“₯ Download Excel",
data=output.getvalue(),
file_name=f"merged_data_{latitude}_{longitude}.xlsx",
mime="application/vnd.ms-excel"
)
if st.button("πŸ”„ Start New Merge"):
st.session_state['analysis_mode'] = False
st.rerun()
# Footer
st.markdown("---")
st.markdown("""
### πŸ“ For Support And Assistance:
Contact:
- Harshitha Gunnam
gunnamharshitha2@gmail.com
- Varun Ravichander
varunravichander2007@gmail.com
""")