Upload correlation_explorer_app.py
Browse files- correlation_explorer_app.py +222 -0
correlation_explorer_app.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import plotly.express as px
|
| 3 |
+
import plotly.graph_objects as go
|
| 4 |
+
from dash import Dash, dcc, html, Input, Output, State, ctx
|
| 5 |
+
import dash_daq as daq
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
# 1. Data Loading & Preprocessing
|
| 9 |
+
|
| 10 |
+
df_global = pd.read_csv("merged_global.csv")
|
| 11 |
+
df_hemi = pd.read_csv("hemispheric_merged.csv")
|
| 12 |
+
# Merge by Year and Month
|
| 13 |
+
df = pd.merge(df_global, df_hemi, on=["year", "month"], suffixes=("", "_hemi"))
|
| 14 |
+
# Add Season column
|
| 15 |
+
def get_season(month):
|
| 16 |
+
return {
|
| 17 |
+
12: "DJF", 1: "DJF", 2: "DJF",
|
| 18 |
+
3: "MAM", 4: "MAM", 5: "MAM",
|
| 19 |
+
6: "JJA", 7: "JJA", 8: "JJA",
|
| 20 |
+
9: "SON", 10: "SON", 11: "SON"
|
| 21 |
+
}[month]
|
| 22 |
+
df["Season"] = df["month"].apply(get_season)
|
| 23 |
+
|
| 24 |
+
# 2. App Initialization
|
| 25 |
+
app = Dash(__name__)
|
| 26 |
+
app.title = "Correlation & Insight Explorer"
|
| 27 |
+
|
| 28 |
+
# Define the variables for the dropdowns
|
| 29 |
+
def get_variables(scope):
|
| 30 |
+
if scope == "global":
|
| 31 |
+
return {
|
| 32 |
+
"co2_anomaly": "CO₂ Anomaly",
|
| 33 |
+
"land_ocean_anomaly": "Global Land+Ocean Temp Anomaly",
|
| 34 |
+
"land_anomaly": "Global Land Temp Anomaly",
|
| 35 |
+
"msl_mm": "Sea Level Change"
|
| 36 |
+
}
|
| 37 |
+
elif scope == "nh":
|
| 38 |
+
return {
|
| 39 |
+
"north_co2_anomaly": "NH CO₂ Anomaly",
|
| 40 |
+
"north_land_ocean_anomaly": "NH Land+Ocean Temp Anomaly",
|
| 41 |
+
"north_land_anomaly": "NH Land Temp Anomaly",
|
| 42 |
+
"msl_mm_north": "NH Sea Level Change"
|
| 43 |
+
|
| 44 |
+
}
|
| 45 |
+
elif scope == "sh":
|
| 46 |
+
return {
|
| 47 |
+
"south_co2_anomaly": "SH CO₂ Anomaly",
|
| 48 |
+
"south_land_ocean_anomaly": "SH Land+Ocean Temp Anomaly",
|
| 49 |
+
"south_land_anomaly": "SH Land Temp Anomaly",
|
| 50 |
+
"msl_mm_south": "SH Sea Level Change"
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
# 3. App Layout
|
| 54 |
+
app.layout = html.Div([
|
| 55 |
+
html.H2("Correlation & Insight Explorer", style={'textAlign': 'center'}),
|
| 56 |
+
|
| 57 |
+
html.Div([
|
| 58 |
+
html.Label("Scope"),
|
| 59 |
+
dcc.RadioItems(id="scope-selector", options=[
|
| 60 |
+
{"label": "🌍 Global", "value": "global"},
|
| 61 |
+
{"label": "🌎 Northern Hemisphere", "value": "nh"},
|
| 62 |
+
{"label": "🌏 Southern Hemisphere", "value": "sh"}
|
| 63 |
+
],
|
| 64 |
+
value="global",
|
| 65 |
+
labelStyle={"display": "inline-block", "margin-right": "15px"}),
|
| 66 |
+
|
| 67 |
+
html.Label("Theme", style={'marginTop': '10px'}),
|
| 68 |
+
daq.ToggleSwitch(
|
| 69 |
+
id='theme-toggle',
|
| 70 |
+
label=['Light', 'Dark'],
|
| 71 |
+
value=False,
|
| 72 |
+
style={'marginTop': '5px'}
|
| 73 |
+
)
|
| 74 |
+
], style={
|
| 75 |
+
'width': '100%',
|
| 76 |
+
'maxWidth': '1200px',
|
| 77 |
+
'margin': '0 auto',
|
| 78 |
+
'fontSize': '16px'
|
| 79 |
+
}),
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
html.Div([
|
| 83 |
+
html.Div([
|
| 84 |
+
html.Label("X-axis Variable"),
|
| 85 |
+
dcc.Dropdown(id='x-axis-dropdown'),
|
| 86 |
+
html.Label("Y-axis Variable", style={'marginTop': '10px'}),
|
| 87 |
+
dcc.Dropdown(id='y-axis-dropdown')],
|
| 88 |
+
style={'width': '48%', 'display': 'inline-block', 'padding': '10px'}),
|
| 89 |
+
html.Div([
|
| 90 |
+
html.Label("Year Range"),
|
| 91 |
+
dcc.RangeSlider(
|
| 92 |
+
id='year-slider',
|
| 93 |
+
min=df['year'].min(), max=df['year'].max(),
|
| 94 |
+
value=[df['year'].min(), df['year'].max()],
|
| 95 |
+
marks={str(year): str(year) for year in range(df['year'].min(), df['year'].max()+1, 5)},
|
| 96 |
+
step=1
|
| 97 |
+
),
|
| 98 |
+
html.Label("View Mode", style={'marginTop': '10px'}),
|
| 99 |
+
dcc.RadioItems(
|
| 100 |
+
id='view-mode',
|
| 101 |
+
options=[{"label": "Monthly", "value": "Monthly"}, {"label": "Seasonal", "value": "Seasonal"}],
|
| 102 |
+
value="Monthly",
|
| 103 |
+
labelStyle={"display": "inline-block", "margin-right": "10px"}
|
| 104 |
+
)
|
| 105 |
+
], style={'width': '48%', 'display': 'inline-block', 'padding': '10px'})
|
| 106 |
+
], style={
|
| 107 |
+
'width': '100%',
|
| 108 |
+
'maxWidth': '1200px',
|
| 109 |
+
'margin': '0 auto',
|
| 110 |
+
'fontSize': '16px'
|
| 111 |
+
}),
|
| 112 |
+
|
| 113 |
+
html.Div([
|
| 114 |
+
html.H4("What You're Seeing", style={"marginTop": "20px"}),
|
| 115 |
+
html.P("This tool allows you to explore the statistical relationships between key climate indicators: CO₂ emissions, sea level rise, and temperature anomalies."),
|
| 116 |
+
html.P("You can switch between Global, Northern, or Southern Hemisphere views and use the dropdowns and sliders to select a timeframe and two indicators to compare."),
|
| 117 |
+
html.P("The scatter plot shows how the selected variables move together over time. A regression line is added to show the trend, and the R² value indicates how well one variable explains the other."),
|
| 118 |
+
html.P("Pearson's r (shown in the heatmap and above the scatter plot) ranges from -1 to +1. A value close to +1 means a strong positive correlation; close to -1 indicates a strong negative one; and close to 0 implies little to no correlation."),
|
| 119 |
+
html.P("The heatmap provides an overview of how all selected variables relate to one another within the chosen scope and time range. It uses Pearson’s correlation coefficients (r) to reveal linear relationships.")
|
| 120 |
+
], style={
|
| 121 |
+
|
| 122 |
+
"backgroundColor": "#f5f5f5",
|
| 123 |
+
"padding": "15px",
|
| 124 |
+
"border": "1px solid #ccc",
|
| 125 |
+
"borderRadius": "6px",
|
| 126 |
+
"marginTop": "20px",
|
| 127 |
+
"maxWidth": "900px",
|
| 128 |
+
"fontSize": "16px"
|
| 129 |
+
}),
|
| 130 |
+
|
| 131 |
+
html.Div(id='correlation-note', style={'padding': '10px', 'fontSize': '16px'}),
|
| 132 |
+
|
| 133 |
+
dcc.Graph(id='scatter-plot'),
|
| 134 |
+
html.H4("Correlation Matrix (Pearson)", style={'textAlign': 'center', 'marginTop': '30px'}),
|
| 135 |
+
dcc.Graph(id='correlation-heatmap')
|
| 136 |
+
])
|
| 137 |
+
# 4. Callbacks
|
| 138 |
+
@app.callback(
|
| 139 |
+
Output('x-axis-dropdown', 'options'),
|
| 140 |
+
Output('y-axis-dropdown', 'options'),
|
| 141 |
+
Output('x-axis-dropdown', 'value'),
|
| 142 |
+
Output('y-axis-dropdown', 'value'),
|
| 143 |
+
Input('scope-selector', 'value') )
|
| 144 |
+
|
| 145 |
+
def update_variable_options(scope):
|
| 146 |
+
vars = get_variables(scope)
|
| 147 |
+
options = [{'label': v, 'value': k} for k, v in vars.items()]
|
| 148 |
+
return options, options, list(vars.keys())[0], list(vars.keys())[1]
|
| 149 |
+
@app.callback(
|
| 150 |
+
Output('scatter-plot', 'figure'),
|
| 151 |
+
Output('correlation-heatmap', 'figure'),
|
| 152 |
+
Output('correlation-note', 'children'),
|
| 153 |
+
Input('x-axis-dropdown', 'value'),
|
| 154 |
+
Input('y-axis-dropdown', 'value'),
|
| 155 |
+
Input('year-slider', 'value'),
|
| 156 |
+
Input('view-mode', 'value'),
|
| 157 |
+
Input('scope-selector', 'value'),
|
| 158 |
+
Input('theme-toggle', 'value')
|
| 159 |
+
)
|
| 160 |
+
def update_visuals(x_var, y_var, year_range, mode, scope, dark_mode):
|
| 161 |
+
vars_dict = get_variables(scope)
|
| 162 |
+
dff = df[(df["year"] >= year_range[0]) & (df["year"] <= year_range[1])]
|
| 163 |
+
|
| 164 |
+
if mode == "Seasonal":
|
| 165 |
+
dff = dff.groupby(['year', 'Season']).mean(numeric_only=True).reset_index()
|
| 166 |
+
else:
|
| 167 |
+
dff = dff.copy()
|
| 168 |
+
r = dff[[x_var, y_var]].corr().iloc[0, 1]
|
| 169 |
+
strength = "No correlation"
|
| 170 |
+
if abs(r) > 0.8:
|
| 171 |
+
strength = "🔍 Very strong correlation"
|
| 172 |
+
elif abs(r) > 0.6:
|
| 173 |
+
strength = "🔍 Strong correlation"
|
| 174 |
+
elif abs(r) > 0.4:
|
| 175 |
+
strength = "🔍 Moderate correlation"
|
| 176 |
+
elif abs(r) > 0.2:
|
| 177 |
+
strength = "🔍 Weak correlation"
|
| 178 |
+
elif abs(r) > 0:
|
| 179 |
+
strength = "🔍 Very weak correlation"
|
| 180 |
+
|
| 181 |
+
corr_sentence = f"{strength} detected (r = {r:.2f})"
|
| 182 |
+
# Scatter Plot with Regression
|
| 183 |
+
fig = px.scatter(
|
| 184 |
+
dff, x=x_var, y=y_var, trendline="ols",
|
| 185 |
+
title=f"{vars_dict[x_var]} vs {vars_dict[y_var]}",
|
| 186 |
+
labels={x_var: vars_dict[x_var], y_var: vars_dict[y_var]},
|
| 187 |
+
template="plotly_dark" if dark_mode else "plotly_white"
|
| 188 |
+
)
|
| 189 |
+
fig.update_traces(
|
| 190 |
+
hovertemplate=f"<b>Year</b>: %{{customdata[0]}}<br><b>Month/Season</b>: %{{customdata[1]}}<br><b>{vars_dict[x_var]}</b>: %{{x:.3f}}<br><b>{vars_dict[y_var]}</b>: %{{y:.3f}}",
|
| 191 |
+
customdata=dff[["year", "Season"]] if mode == "Seasonal" else dff[["year", "month"]]
|
| 192 |
+
)
|
| 193 |
+
# Add R² if OLS exists
|
| 194 |
+
try:
|
| 195 |
+
results = px.get_trendline_results(fig)
|
| 196 |
+
r_squared = results.iloc[0]["px_fit_results"].rsquared
|
| 197 |
+
fig.add_annotation(
|
| 198 |
+
xref="paper", yref="paper",
|
| 199 |
+
x=0.95, y=0.05,
|
| 200 |
+
text=f"R² = {r_squared:.2f}",
|
| 201 |
+
showarrow=False,
|
| 202 |
+
font=dict(size=14, color="white" if dark_mode else "black")
|
| 203 |
+
)
|
| 204 |
+
except:
|
| 205 |
+
pass
|
| 206 |
+
|
| 207 |
+
corr = dff[list(vars_dict.keys())].corr().round(2)
|
| 208 |
+
heatmap = go.Figure(data=go.Heatmap(
|
| 209 |
+
z=corr.values,
|
| 210 |
+
x=list(vars_dict.values()),
|
| 211 |
+
y=list(vars_dict.values()),
|
| 212 |
+
colorscale='Cividis',
|
| 213 |
+
zmin=-1, zmax=1,
|
| 214 |
+
colorbar=dict(title="Pearson r")
|
| 215 |
+
))
|
| 216 |
+
heatmap.update_layout(template="plotly_dark" if dark_mode else "plotly_white")
|
| 217 |
+
|
| 218 |
+
return fig, heatmap, corr_sentence
|
| 219 |
+
|
| 220 |
+
# 5. Run app
|
| 221 |
+
if __name__ == '__main__':
|
| 222 |
+
app.run(debug=True)
|