import pandas as pd
import os
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import numpy as np
import io
import base64
import webbrowser
# Define paths
current_dir = Path.cwd()
if (current_dir / "data" / "cop_modelling").exists():
data_path = current_dir / "data" / "cop_modelling"
elif (current_dir.parent / "data" / "cop_modelling").exists():
data_path = current_dir.parent / "data" / "cop_modelling"
else:
data_path = Path("..") / "data" / "cop_modelling"
output_file = data_path / "joined_results.parquet"
print(f"Loading data from {output_file}...")
joined_df = pd.read_parquet(output_file)
html_parts = [
"",
"
COP Analysis Report",
"",
"COP Analysis Comprehensive Report
"
]
def add_matplotlib_fig(title):
buf = io.BytesIO()
plt.savefig(buf, format='png', bbox_inches='tight', dpi=120)
plt.close()
buf.seek(0)
b64 = base64.b64encode(buf.read()).decode('utf-8')
html_parts.append(f"{title}

")
def add_plotly_fig(fig, title):
# include_plotlyjs='cdn' ensures the HTML doesn't bundle the 3MB plotly.js library
html_div = fig.to_html(full_html=False, include_plotlyjs='cdn')
html_parts.append(f"{title}
{html_div}")
print("Generating Correlation Matrix...")
html_parts.append("")
# 1. Correlation matrix
sns.set_theme(style="whitegrid")
numerical_cols = joined_df.select_dtypes(include=['number']).columns
if len(numerical_cols) > 0:
plt.figure(figsize=(10, 8))
corr_matrix = joined_df[numerical_cols].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", vmin=-1, vmax=1)
plt.title('Correlation Matrix of Numerical Variables')
plt.tight_layout()
add_matplotlib_fig('Correlation Matrix')
print("Generating Distribution Plots...")
# 2. Distributions
for col in numerical_cols:
plt.figure(figsize=(8, 4))
sns.histplot(joined_df[col].dropna(), kde=True, bins=30)
plt.title(f'Distribution of {col}')
plt.tight_layout()
add_matplotlib_fig(f'Distribution of {col}')
categorical_cols = joined_df.select_dtypes(exclude=['number']).columns
for col in categorical_cols:
plt.figure(figsize=(10, 5))
top_categories = joined_df[col].value_counts().nlargest(20).index
sns.countplot(data=joined_df[joined_df[col].isin(top_categories)], x=col, order=top_categories)
plt.title(f'Distribution of {col} (Top 20 categories)')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
add_matplotlib_fig(f'Distribution of {col}')
html_parts.append("
")
print("Generating Plotly 3D Visualizations...")
# Plotly
cols = joined_df.columns.tolist()
cols_lower = [c.lower() for c in cols]
def find_col(possible_names):
for name in possible_names:
for idx, c in enumerate(cols_lower):
if name.lower() in c:
return cols[idx]
return None
col_quelle = find_col(['t_vorlauf_quelle', 'quelle'])
col_senke = find_col(['t_vorlauf_senke', 'senke'])
col_cop = find_col(['cop'])
col_komp = find_col(['kompressor', 'stufe'])
col_kalt = find_col(['kältemittel', 'kaltemittel', 'kaeltemittel', 'refrigerant'])
required_cols = {'Quelle (X)': col_quelle, 'Senke (Y)': col_senke, 'COP (Z)': col_cop, 'Kompressor': col_komp, 'Kältemittel': col_kalt}
missing = {k: v for k, v in required_cols.items() if v is None}
if not missing:
# Fig 1
fig = go.Figure()
plot_df = joined_df.dropna(subset=list(required_cols.values())).copy()
combinations = plot_df.groupby([col_komp, col_kalt]).size().reset_index()
traces = []
buttons = []
for i, row in combinations.iterrows():
komp_val = str(row[col_komp])
kalt_val = str(row[col_kalt])
subset = plot_df[(plot_df[col_komp] == row[col_komp]) & (plot_df[col_kalt] == row[col_kalt])]
if len(subset) < 3: continue
pivot = subset.pivot_table(values=col_cop, index=col_senke, columns=col_quelle, aggfunc='mean')
trace_name = f"{komp_val} | {kalt_val}"
trace = go.Surface(
x=pivot.columns.values, y=pivot.index.values, z=pivot.values,
name=trace_name, visible=(len(traces) == 0),
hovertemplate=f"Quelle (X): %{{x}}
Senke (Y): %{{y}}
COP (Z): %{{z}}{trace_name}"
)
traces.append(trace)
fig.add_trace(trace)
for i, trace in enumerate(traces):
visibility = [False] * len(traces)
visibility[i] = True
button = dict(label=trace.name, method="update", args=[{"visible": visibility}, {"title": f"COP Surface - {trace.name}"}])
buttons.append(button)
if traces:
fig.update_layout(
updatemenus=[dict(active=0, buttons=buttons, direction="down", pad={"r": 10, "t": 10}, showactive=True, x=0.1, xanchor="left", y=1.15, yanchor="top")],
title=f"COP Surface - {traces[0].name}", scene=dict(xaxis_title=col_quelle, yaxis_title=col_senke, zaxis_title=col_cop),
autosize=True, height=700, margin=dict(l=65, r=50, b=65, t=90)
)
add_plotly_fig(fig, 'Interactive COP Surfaces by Kompressor & Kältemittel')
# Fig 2
fig2 = go.Figure()
colorscales = ['Viridis', 'Plasma', 'Inferno', 'Magma', 'Cividis', 'Blues', 'Greens', 'Reds']
unique_kalt = plot_df[col_kalt].dropna().unique()
for idx, kalt_val in enumerate(unique_kalt):
kalt_val = str(kalt_val)
subset = plot_df[plot_df[col_kalt] == kalt_val]
if len(subset) < 3: continue
pivot = subset.pivot_table(values=col_cop, index=col_senke, columns=col_quelle, aggfunc='mean')
cscale = colorscales[idx % len(colorscales)]
trace = go.Surface(
x=pivot.columns.values, y=pivot.index.values, z=pivot.values,
name=kalt_val, showscale=False, colorscale=cscale, showlegend=True,
hovertemplate=f"Kältemittel: {kalt_val}
Quelle (X): %{{x}}
Senke (Y): %{{y}}
COP (Z): %{{z}}"
)
fig2.add_trace(trace)
fig2.update_layout(
title="Stacked COP Surfaces by Kältemittel", scene=dict(xaxis_title=col_quelle, yaxis_title=col_senke, zaxis_title=col_cop),
legend=dict(title="Kältemittel
(Click to toggle)", x=1.05, y=0.9),
autosize=True, height=800, margin=dict(l=65, r=50, b=65, t=90)
)
add_plotly_fig(fig2, 'Stacked COP Surfaces Overview')
html_parts.append("")
report_path = Path("cop_analysis_report.html").resolve()
with open(report_path, "w", encoding="utf-8") as f:
f.write("\n".join(html_parts))
print(f"Report generated and saved to {report_path}")
webbrowser.open('file://' + str(report_path))