import pandas as pd import os from pathlib import Path import matplotlib.pyplot as plt import seaborn as sns import plotly.graph_objects as go import numpy as np import io import base64 import webbrowser # Define paths current_dir = Path.cwd() if (current_dir / "data" / "cop_modelling").exists(): data_path = current_dir / "data" / "cop_modelling" elif (current_dir.parent / "data" / "cop_modelling").exists(): data_path = current_dir.parent / "data" / "cop_modelling" else: data_path = Path("..") / "data" / "cop_modelling" output_file = data_path / "joined_results.parquet" print(f"Loading data from {output_file}...") joined_df = pd.read_parquet(output_file) html_parts = [ "", "COP Analysis Report", "", "

COP Analysis Comprehensive Report

" ] def add_matplotlib_fig(title): buf = io.BytesIO() plt.savefig(buf, format='png', bbox_inches='tight', dpi=120) plt.close() buf.seek(0) b64 = base64.b64encode(buf.read()).decode('utf-8') html_parts.append(f"

{title}

") def add_plotly_fig(fig, title): # include_plotlyjs='cdn' ensures the HTML doesn't bundle the 3MB plotly.js library html_div = fig.to_html(full_html=False, include_plotlyjs='cdn') html_parts.append(f"

{title}

{html_div}

") print("Generating Correlation Matrix...") html_parts.append("

") # 1. Correlation matrix sns.set_theme(style="whitegrid") numerical_cols = joined_df.select_dtypes(include=['number']).columns if len(numerical_cols) > 0: plt.figure(figsize=(10, 8)) corr_matrix = joined_df[numerical_cols].corr() sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", vmin=-1, vmax=1) plt.title('Correlation Matrix of Numerical Variables') plt.tight_layout() add_matplotlib_fig('Correlation Matrix') print("Generating Distribution Plots...") # 2. Distributions for col in numerical_cols: plt.figure(figsize=(8, 4)) sns.histplot(joined_df[col].dropna(), kde=True, bins=30) plt.title(f'Distribution of {col}') plt.tight_layout() add_matplotlib_fig(f'Distribution of {col}') categorical_cols = joined_df.select_dtypes(exclude=['number']).columns for col in categorical_cols: plt.figure(figsize=(10, 5)) top_categories = joined_df[col].value_counts().nlargest(20).index sns.countplot(data=joined_df[joined_df[col].isin(top_categories)], x=col, order=top_categories) plt.title(f'Distribution of {col} (Top 20 categories)') plt.xticks(rotation=45, ha='right') plt.tight_layout() add_matplotlib_fig(f'Distribution of {col}') html_parts.append("

") print("Generating Plotly 3D Visualizations...") # Plotly cols = joined_df.columns.tolist() cols_lower = [c.lower() for c in cols] def find_col(possible_names): for name in possible_names: for idx, c in enumerate(cols_lower): if name.lower() in c: return cols[idx] return None col_quelle = find_col(['t_vorlauf_quelle', 'quelle']) col_senke = find_col(['t_vorlauf_senke', 'senke']) col_cop = find_col(['cop']) col_komp = find_col(['kompressor', 'stufe']) col_kalt = find_col(['kältemittel', 'kaltemittel', 'kaeltemittel', 'refrigerant']) required_cols = {'Quelle (X)': col_quelle, 'Senke (Y)': col_senke, 'COP (Z)': col_cop, 'Kompressor': col_komp, 'Kältemittel': col_kalt} missing = {k: v for k, v in required_cols.items() if v is None} if not missing: # Fig 1 fig = go.Figure() plot_df = joined_df.dropna(subset=list(required_cols.values())).copy() combinations = plot_df.groupby([col_komp, col_kalt]).size().reset_index() traces = [] buttons = [] for i, row in combinations.iterrows(): komp_val = str(row[col_komp]) kalt_val = str(row[col_kalt]) subset = plot_df[(plot_df[col_komp] == row[col_komp]) & (plot_df[col_kalt] == row[col_kalt])] if len(subset) < 3: continue pivot = subset.pivot_table(values=col_cop, index=col_senke, columns=col_quelle, aggfunc='mean') trace_name = f"{komp_val} | {kalt_val}" trace = go.Surface( x=pivot.columns.values, y=pivot.index.values, z=pivot.values, name=trace_name, visible=(len(traces) == 0), hovertemplate=f"Quelle (X): %{{x}}
Senke (Y): %{{y}}
COP (Z): %{{z}}{trace_name}" ) traces.append(trace) fig.add_trace(trace) for i, trace in enumerate(traces): visibility = [False] * len(traces) visibility[i] = True button = dict(label=trace.name, method="update", args=[{"visible": visibility}, {"title": f"COP Surface - {trace.name}"}]) buttons.append(button) if traces: fig.update_layout( updatemenus=[dict(active=0, buttons=buttons, direction="down", pad={"r": 10, "t": 10}, showactive=True, x=0.1, xanchor="left", y=1.15, yanchor="top")], title=f"COP Surface - {traces[0].name}", scene=dict(xaxis_title=col_quelle, yaxis_title=col_senke, zaxis_title=col_cop), autosize=True, height=700, margin=dict(l=65, r=50, b=65, t=90) ) add_plotly_fig(fig, 'Interactive COP Surfaces by Kompressor & Kältemittel') # Fig 2 fig2 = go.Figure() colorscales = ['Viridis', 'Plasma', 'Inferno', 'Magma', 'Cividis', 'Blues', 'Greens', 'Reds'] unique_kalt = plot_df[col_kalt].dropna().unique() for idx, kalt_val in enumerate(unique_kalt): kalt_val = str(kalt_val) subset = plot_df[plot_df[col_kalt] == kalt_val] if len(subset) < 3: continue pivot = subset.pivot_table(values=col_cop, index=col_senke, columns=col_quelle, aggfunc='mean') cscale = colorscales[idx % len(colorscales)] trace = go.Surface( x=pivot.columns.values, y=pivot.index.values, z=pivot.values, name=kalt_val, showscale=False, colorscale=cscale, showlegend=True, hovertemplate=f"Kältemittel: {kalt_val}
Quelle (X): %{{x}}
Senke (Y): %{{y}}
COP (Z): %{{z}}" ) fig2.add_trace(trace) fig2.update_layout( title="Stacked COP Surfaces by Kältemittel", scene=dict(xaxis_title=col_quelle, yaxis_title=col_senke, zaxis_title=col_cop), legend=dict(title="Kältemittel
(Click to toggle)", x=1.05, y=0.9), autosize=True, height=800, margin=dict(l=65, r=50, b=65, t=90) ) add_plotly_fig(fig2, 'Stacked COP Surfaces Overview') html_parts.append("") report_path = Path("cop_analysis_report.html").resolve() with open(report_path, "w", encoding="utf-8") as f: f.write("\n".join(html_parts)) print(f"Report generated and saved to {report_path}") webbrowser.open('file://' + str(report_path))