# %% [markdown] # # COP Results Joiner # This notebook joins all Excel files from `data/cop_modelling` into a single Parquet file. # %% import pandas as pd import os from pathlib import Path # %% # Define paths # Try to resolve data path dynamically based on current working directory current_dir = Path.cwd() if (current_dir / "data" / "cop_modelling").exists(): data_path = current_dir / "data" / "cop_modelling" elif (current_dir.parent / "data" / "cop_modelling").exists(): data_path = current_dir.parent / "data" / "cop_modelling" else: # Fallback data_path = Path("..") / "data" / "cop_modelling" output_file = data_path / "joined_results.parquet" # Configuration LOAD_FROM_PARQUET = True # Set to False to rebuild from Excel files # %% if LOAD_FROM_PARQUET and output_file.exists(): print(f"Loading data directly from {output_file.name}...") joined_df = pd.read_parquet(output_file) print(f"Loaded shape: {joined_df.shape}") else: # Get all Excel files excel_files = list(data_path.glob("*.xlsx")) print(f"Found {len(excel_files)} files in {data_path.resolve()}: {[f.name for f in excel_files]}") # Load and join dfs = [] for f in excel_files: try: # Results are in 'Results' sheet df = pd.read_excel(f, sheet_name='Results') # Drop the first row (which usually contains units) df = df.iloc[1:].reset_index(drop=True) # Add a column to identify the source df['source_file'] = f.name # Convert columns to numerical if possible, else convert to strings for col in df.columns: try: df[col] = pd.to_numeric(df[col], errors='raise') except (ValueError, TypeError): df[col] = df[col].astype(str) dfs.append(df) except Exception as e: print(f"Error reading {f}: {e}") if not dfs: raise ValueError(f"No objects to concatenate. Could not find or read any valid Excel files in {data_path.resolve()}.") joined_df = pd.concat(dfs, ignore_index=True) print(f"Joined shape: {joined_df.shape}") # Save to parquet joined_df.to_parquet(output_file) print(f"Saved to {output_file}") # %% # Quick preview joined_df print(joined_df.columns) # %% df = joined_df.copy() # Mayor df['t_diff_senke'] = df['T_Vorlauf_Senke'] - df['T_Rücklauf_Senke'] # Menor df['t_diff_quelle'] = df['T_Vorlauf_Quelle'] - df['T_Rücklauf_Quelle'] df['temp_hub'] = df['T_Vorlauf_Senke'] - df['T_Rücklauf_Quelle'] print(df['t_diff_quelle'].value_counts()) print(df['t_diff_senke'].value_counts()) # %% # Descriptive Statistics import plotly.graph_objects as go from IPython.display import display print("=== Missing (NaN) Values ===") missing_values = df.isna().sum() print(missing_values[missing_values > 0]) print("\n=== Descriptive Statistics ===") stats_cols = ['T_Vorlauf_Quelle', 'T_Rücklauf_Quelle', 'T_Rücklauf_Senke', 'T_Vorlauf_Senke', 'COP', 'temp_hub'] desc_stats = df[stats_cols].describe() display(desc_stats) # Visualize distributions with Box plots fig_box = go.Figure() for col in ['T_Vorlauf_Quelle', 'T_Rücklauf_Quelle', 'T_Rücklauf_Senke', 'T_Vorlauf_Senke', 'temp_hub']: fig_box.add_trace(go.Box(y=df[col].dropna(), name=col)) fig_box.update_layout( title='Distribution of Temperature Variables', yaxis_title='Temperature', showlegend=False ) fig_box.show(renderer='notebook') fig_cop = go.Figure() fig_cop.add_trace(go.Box(y=df['COP'].dropna(), name='COP', marker_color='orange')) fig_cop.update_layout(title='Distribution of COP', yaxis_title='COP', showlegend=False) fig_cop.show(renderer='notebook') # Visualize categorical variables with Bar charts categorical_cols = ['Kältemittel', 'Kompressor_Nr_Stufe1', 'Medium_Senke'] for col in categorical_cols: if col in df.columns: counts = df[col].value_counts().reset_index() counts.columns = [col, 'count'] fig_bar = go.Figure() fig_bar.add_trace(go.Bar( x=counts[col].astype(str), y=counts['count'], name=col, marker_color='royalblue' )) fig_bar.update_layout( title=f'Distribution of {col}', xaxis_title=col, yaxis_title='Count', showlegend=False ) fig_bar.show(renderer='notebook') # Visualize correlation matrix numeric_cols = ['T_Vorlauf_Quelle', 'T_Rücklauf_Quelle', 'T_Rücklauf_Senke', 'T_Vorlauf_Senke', 'COP', 'COP_Lorenz', 'temp_hub', 't_diff_senke', 't_diff_quelle'] # Only include columns that actually exist in the dataframe available_numeric_cols = [col for col in numeric_cols if col in df.columns] if available_numeric_cols: corr_matrix = df[available_numeric_cols].corr() fig_corr = go.Figure(data=go.Heatmap( z=corr_matrix.values, x=corr_matrix.columns, y=corr_matrix.columns, colorscale='RdBu', zmin=-1, zmax=1, text=corr_matrix.round(2).values, texttemplate="%{text}", hoverinfo="text" )) fig_corr.update_layout( title='Correlation Matrix of Numeric Variables', width=800, height=800 ) fig_corr.show(renderer='notebook') #%% import pandas as pd import plotly.graph_objects as go import ipywidgets as widgets from IPython.display import display, clear_output print(df.columns) # ============================================================ # Prepare dataframe # ============================================================ # Create new column df['temp_hub'] = df['T_Vorlauf_Senke'] - df['T_Rücklauf_Quelle'] # Keep required columns df = df[[ 'Medium_Senke', 'Kältemittel', 'T_Vorlauf_Quelle', 'T_Rücklauf_Quelle', 'T_Rücklauf_Senke', 'T_Vorlauf_Senke', 'Kompressor_Nr_Stufe1', 'COP', 'COP_Lorenz', 'source_file', 't_diff_senke', 't_diff_quelle', 'temp_hub' ]].copy() # Drop missing values df = df.dropna() # Convert columns for filtering df['Kältemittel_filter'] = df['Kältemittel'].astype(str) df['Kompressor_filter'] = df['Kompressor_Nr_Stufe1'].astype(float).astype(int).astype(str) # Combine Kältemittel and compressor stage correctly df['Kältemittel_stufen'] = ( df['Kältemittel_filter'] + '_' + df['Kompressor_filter'] ) # Sort dataframe by default temperature columns df = df.sort_values( by=['T_Rücklauf_Quelle', 'T_Vorlauf_Senke'], ascending=[True, True] ) # ============================================================ # Define default column names # ============================================================ default_x = 'T_Rücklauf_Quelle' default_y = 'T_Vorlauf_Senke' default_z = 'COP' col_kalt = 'Kältemittel_stufen' plot_df = df.copy() # ============================================================ # Find selectable numeric axis columns # ============================================================ axis_options = [] for col in plot_df.columns: numeric_version = pd.to_numeric(plot_df[col], errors='coerce') if numeric_version.notna().sum() > 0 and numeric_version.nunique() >= 2: axis_options.append(col) # Make sure defaults exist if default_x not in axis_options: axis_options.append(default_x) if default_y not in axis_options: axis_options.append(default_y) if default_z not in axis_options: axis_options.append(default_z) # Optional: nicer ordering preferred_order = [ 'T_Vorlauf_Quelle', 'T_Rücklauf_Quelle', 'T_Rücklauf_Senke', 'T_Vorlauf_Senke', 't_diff_senke', 't_diff_quelle', 'temp_hub', 'COP', 'COP_Lorenz', 'Kompressor_Nr_Stufe1' ] axis_options = [c for c in preferred_order if c in axis_options] + [ c for c in axis_options if c not in preferred_order ] # ============================================================ # Check missing required columns # ============================================================ required_cols = [ default_x, default_y, default_z, 'COP_Lorenz', col_kalt, 'Kältemittel_filter', 'Kompressor_filter' ] missing = [col for col in required_cols if col not in plot_df.columns] if missing: print("Missing columns:", missing) else: print("Preparing stacked 3D surface plot with filters and selectable axes...") # ============================================================ # Options for filters # ============================================================ kalt_options = sorted(plot_df['Kältemittel_filter'].dropna().unique()) komp_options = sorted( plot_df['Kompressor_filter'].dropna().unique(), key=lambda x: int(x) ) # Same Plotly colorscales as your original code colorscales = [ 'Viridis', 'Plasma', 'Inferno', 'Magma', 'Cividis', 'Blues', 'Greens', 'Reds' ] # Stable global order, so colors stay consistent all_groups = sorted(plot_df[col_kalt].dropna().unique()) group_color_map = { group: colorscales[idx % len(colorscales)] for idx, group in enumerate(all_groups) } # ============================================================ # Widgets # ============================================================ select_kalt = widgets.SelectMultiple( options=kalt_options, value=tuple(kalt_options), description='Kältemittel:', rows=min(8, len(kalt_options)), style={'description_width': 'initial'}, layout=widgets.Layout(width='300px') ) select_komp = widgets.SelectMultiple( options=komp_options, value=tuple(komp_options), description='Kompressor:', rows=min(8, len(komp_options)), style={'description_width': 'initial'}, layout=widgets.Layout(width='300px') ) select_x = widgets.Dropdown( options=axis_options, value=default_x, description='X-Axis:', style={'description_width': 'initial'}, layout=widgets.Layout(width='280px') ) select_y = widgets.Dropdown( options=axis_options, value=default_y, description='Y-Axis:', style={'description_width': 'initial'}, layout=widgets.Layout(width='280px') ) select_z = widgets.Dropdown( options=axis_options, value=default_z, description='Z-Axis:', style={'description_width': 'initial'}, layout=widgets.Layout(width='280px') ) button_update = widgets.Button( description='Update Plot', button_style='primary', icon='refresh' ) button_all = widgets.Button( description='Select All', button_style='success' ) button_clear = widgets.Button( description='Clear All', button_style='warning' ) output = widgets.Output() # ============================================================ # Plot function # ============================================================ def create_stacked_surface_plot(_=None): with output: clear_output(wait=True) selected_kalt = list(select_kalt.value) selected_komp = list(select_komp.value) selected_x = select_x.value selected_y = select_y.value selected_z = select_z.value if len(selected_kalt) == 0: print("Please select at least one Kältemittel.") return if len(selected_komp) == 0: print("Please select at least one Kompressor_Nr_Stufe1.") return if selected_x == selected_y: print("X-axis and Y-axis cannot be the same for a surface plot.") return filtered_df = plot_df[ (plot_df['Kältemittel_filter'].isin(selected_kalt)) & (plot_df['Kompressor_filter'].isin(selected_komp)) ].copy() # Convert selected axis columns to numeric filtered_df[selected_x] = pd.to_numeric(filtered_df[selected_x], errors='coerce') filtered_df[selected_y] = pd.to_numeric(filtered_df[selected_y], errors='coerce') filtered_df[selected_z] = pd.to_numeric(filtered_df[selected_z], errors='coerce') # Drop rows where selected axis values are missing filtered_df = filtered_df.dropna(subset=[selected_x, selected_y, selected_z]) print("Selected Kältemittel:", selected_kalt) print("Selected Kompressor:", selected_komp) print("Selected X-axis:", selected_x) print("Selected Y-axis:", selected_y) print("Selected Z-axis:", selected_z) print("Rows after filtering:", len(filtered_df)) if filtered_df.empty: print("No data for selected filters and selected axes.") return fig2 = go.Figure() unique_kalt = sorted(filtered_df[col_kalt].dropna().unique()) added_traces = 0 for kalt_val in unique_kalt: subset = filtered_df[filtered_df[col_kalt] == kalt_val].copy() if len(subset) < 3: continue pivot = subset.pivot_table( values=selected_z, index=selected_y, columns=selected_x, aggfunc='mean' ) # Sort both axes increasing pivot = pivot.sort_index(axis=0, ascending=True) pivot = pivot.sort_index(axis=1, ascending=True) # Skip empty or too-small pivots if pivot.empty: continue if pivot.shape[0] < 2 or pivot.shape[1] < 2: continue cscale = group_color_map.get(kalt_val, 'Viridis') trace = go.Surface( x=pivot.columns.values, y=pivot.index.values, z=pivot.values, name=str(kalt_val), showscale=False, colorscale=cscale, showlegend=True, hovertemplate=( f"Kältemittel/Stufe: {kalt_val}
" f"{selected_x}: %{{x}}
" f"{selected_y}: %{{y}}
" f"{selected_z}: %{{z}}" "" ) ) fig2.add_trace(trace) added_traces += 1 if added_traces == 0: print("Not enough data points to create surfaces for this selection.") return fig2.update_layout( title=f"Stacked {selected_z} Surfaces by Kältemittel / Kompressor", scene=dict( xaxis=dict( title=selected_x, autorange='reversed' if selected_x == default_x else True ), yaxis=dict( title=selected_y, autorange=True ), zaxis=dict( title=selected_z, autorange=True ), camera=dict( eye=dict(x=1.7, y=-1.7, z=1.2) ) ), legend=dict( title="Kältemittel / Stufe
Click to toggle", x=1.05, y=0.9 ), autosize=False, width=950, height=820, margin=dict(l=65, r=50, b=65, t=90) ) fig2.show( renderer="notebook", config={ "scrollZoom": True, "displayModeBar": True, "responsive": True } ) # ============================================================ # Button functions # ============================================================ def select_all_filters(_=None): select_kalt.value = tuple(kalt_options) select_komp.value = tuple(komp_options) def clear_all_filters(_=None): select_kalt.value = tuple() select_komp.value = tuple() button_update.on_click(create_stacked_surface_plot) button_all.on_click(select_all_filters) button_clear.on_click(clear_all_filters) # ============================================================ # Display UI on top # ============================================================ controls = widgets.VBox([ widgets.HTML( "Select/deselect filters and choose axes:
" "Use Ctrl+Click or Cmd+Click to select multiple Kältemittel/Kompressors." ), widgets.HBox([ select_kalt, select_komp ]), widgets.HBox([ select_x, select_y, select_z ]), widgets.HBox([ button_update, button_all, button_clear ]) ]) display(controls, output) print("Select filters/axes above and click 'Update Plot'.") #%% import subprocess import sys def export_to_html(script_name="cop_visualization.py"): """ Exports the current script/notebook to an HTML file. Note: If you are using an Interactive Window in VS Code, you can also just click the 'Export' button in the toolbar at the top of the window! """ print(f"Exporting {script_name} to HTML...") try: # First, we need to convert the .py script to a .ipynb notebook using jupytext print("1. Converting .py to .ipynb format...") subprocess.run([sys.executable, "-m", "jupytext", "--to", "notebook", script_name], check=True) notebook_name = script_name.replace(".py", ".ipynb") print("2. Executing notebook and generating HTML...") result = subprocess.run( [sys.executable, "-m", "jupyter", "nbconvert", "--to", "html", "--execute", notebook_name], capture_output=True, text=True ) if result.returncode == 0: print(f"Successfully exported to HTML! Look for {notebook_name.replace('.ipynb', '.html')} in your directory.") # Optional: Clean up the intermediate .ipynb file import os if os.path.exists(notebook_name): os.remove(notebook_name) else: print("Failed to export. Error:") print(result.stderr) except Exception as e: print(f"Error during export: {e}") print("Make sure you have jupyter, nbconvert, and jupytext installed.") # Uncomment the line below to automatically export to HTML when you "Run All" export_to_html("cop_visualization.py") # %%