Spaces:
Running
Running
| # %% [markdown] | |
| # # COP Results Joiner | |
| # This notebook joins all Excel files from `data/cop_modelling` into a single Parquet file. | |
| # %% | |
| import pandas as pd | |
| import os | |
| from pathlib import Path | |
| # %% | |
| # Define paths | |
| # Try to resolve data path dynamically based on current working directory | |
| current_dir = Path.cwd() | |
| if (current_dir / "data" / "cop_modelling").exists(): | |
| data_path = current_dir / "data" / "cop_modelling" | |
| elif (current_dir.parent / "data" / "cop_modelling").exists(): | |
| data_path = current_dir.parent / "data" / "cop_modelling" | |
| else: | |
| # Fallback | |
| data_path = Path("..") / "data" / "cop_modelling" | |
| output_file = data_path / "joined_results.parquet" | |
| # Configuration | |
| LOAD_FROM_PARQUET = True # Set to False to rebuild from Excel files | |
| # %% | |
| if LOAD_FROM_PARQUET and output_file.exists(): | |
| print(f"Loading data directly from {output_file.name}...") | |
| joined_df = pd.read_parquet(output_file) | |
| print(f"Loaded shape: {joined_df.shape}") | |
| else: | |
| # Get all Excel files | |
| excel_files = list(data_path.glob("*.xlsx")) | |
| print(f"Found {len(excel_files)} files in {data_path.resolve()}: {[f.name for f in excel_files]}") | |
| # Load and join | |
| dfs = [] | |
| for f in excel_files: | |
| try: | |
| # Results are in 'Results' sheet | |
| df = pd.read_excel(f, sheet_name='Results') | |
| # Drop the first row (which usually contains units) | |
| df = df.iloc[1:].reset_index(drop=True) | |
| # Add a column to identify the source | |
| df['source_file'] = f.name | |
| # Convert columns to numerical if possible, else convert to strings | |
| for col in df.columns: | |
| try: | |
| df[col] = pd.to_numeric(df[col], errors='raise') | |
| except (ValueError, TypeError): | |
| df[col] = df[col].astype(str) | |
| dfs.append(df) | |
| except Exception as e: | |
| print(f"Error reading {f}: {e}") | |
| if not dfs: | |
| raise ValueError(f"No objects to concatenate. Could not find or read any valid Excel files in {data_path.resolve()}.") | |
| joined_df = pd.concat(dfs, ignore_index=True) | |
| print(f"Joined shape: {joined_df.shape}") | |
| # Save to parquet | |
| joined_df.to_parquet(output_file) | |
| print(f"Saved to {output_file}") | |
| # %% | |
| # Quick preview | |
| joined_df | |
| print(joined_df.columns) | |
| # %% | |
| df = joined_df.copy() | |
| # Mayor | |
| df['t_diff_senke'] = df['T_Vorlauf_Senke'] - df['T_Rücklauf_Senke'] | |
| # Menor | |
| df['t_diff_quelle'] = df['T_Vorlauf_Quelle'] - df['T_Rücklauf_Quelle'] | |
| df['temp_hub'] = df['T_Vorlauf_Senke'] - df['T_Rücklauf_Quelle'] | |
| print(df['t_diff_quelle'].value_counts()) | |
| print(df['t_diff_senke'].value_counts()) | |
| # %% | |
| # Descriptive Statistics | |
| import plotly.graph_objects as go | |
| from IPython.display import display | |
| print("=== Missing (NaN) Values ===") | |
| missing_values = df.isna().sum() | |
| print(missing_values[missing_values > 0]) | |
| print("\n=== Descriptive Statistics ===") | |
| stats_cols = ['T_Vorlauf_Quelle', 'T_Rücklauf_Quelle', 'T_Rücklauf_Senke', 'T_Vorlauf_Senke', 'COP', 'temp_hub'] | |
| desc_stats = df[stats_cols].describe() | |
| display(desc_stats) | |
| # Visualize distributions with Box plots | |
| fig_box = go.Figure() | |
| for col in ['T_Vorlauf_Quelle', 'T_Rücklauf_Quelle', 'T_Rücklauf_Senke', 'T_Vorlauf_Senke', 'temp_hub']: | |
| fig_box.add_trace(go.Box(y=df[col].dropna(), name=col)) | |
| fig_box.update_layout( | |
| title='Distribution of Temperature Variables', | |
| yaxis_title='Temperature', | |
| showlegend=False | |
| ) | |
| fig_box.show(renderer='notebook') | |
| fig_cop = go.Figure() | |
| fig_cop.add_trace(go.Box(y=df['COP'].dropna(), name='COP', marker_color='orange')) | |
| fig_cop.update_layout(title='Distribution of COP', yaxis_title='COP', showlegend=False) | |
| fig_cop.show(renderer='notebook') | |
| # Visualize categorical variables with Bar charts | |
| categorical_cols = ['Kältemittel', 'Kompressor_Nr_Stufe1', 'Medium_Senke'] | |
| for col in categorical_cols: | |
| if col in df.columns: | |
| counts = df[col].value_counts().reset_index() | |
| counts.columns = [col, 'count'] | |
| fig_bar = go.Figure() | |
| fig_bar.add_trace(go.Bar( | |
| x=counts[col].astype(str), | |
| y=counts['count'], | |
| name=col, | |
| marker_color='royalblue' | |
| )) | |
| fig_bar.update_layout( | |
| title=f'Distribution of {col}', | |
| xaxis_title=col, | |
| yaxis_title='Count', | |
| showlegend=False | |
| ) | |
| fig_bar.show(renderer='notebook') | |
| # Visualize correlation matrix | |
| numeric_cols = ['T_Vorlauf_Quelle', 'T_Rücklauf_Quelle', 'T_Rücklauf_Senke', 'T_Vorlauf_Senke', 'COP', 'COP_Lorenz', 'temp_hub', 't_diff_senke', 't_diff_quelle'] | |
| # Only include columns that actually exist in the dataframe | |
| available_numeric_cols = [col for col in numeric_cols if col in df.columns] | |
| if available_numeric_cols: | |
| corr_matrix = df[available_numeric_cols].corr() | |
| fig_corr = go.Figure(data=go.Heatmap( | |
| z=corr_matrix.values, | |
| x=corr_matrix.columns, | |
| y=corr_matrix.columns, | |
| colorscale='RdBu', | |
| zmin=-1, zmax=1, | |
| text=corr_matrix.round(2).values, | |
| texttemplate="%{text}", | |
| hoverinfo="text" | |
| )) | |
| fig_corr.update_layout( | |
| title='Correlation Matrix of Numeric Variables', | |
| width=800, | |
| height=800 | |
| ) | |
| fig_corr.show(renderer='notebook') | |
| #%% | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| import ipywidgets as widgets | |
| from IPython.display import display, clear_output | |
| print(df.columns) | |
| # ============================================================ | |
| # Prepare dataframe | |
| # ============================================================ | |
| # Create new column | |
| df['temp_hub'] = df['T_Vorlauf_Senke'] - df['T_Rücklauf_Quelle'] | |
| # Keep required columns | |
| df = df[[ | |
| 'Medium_Senke', | |
| 'Kältemittel', | |
| 'T_Vorlauf_Quelle', | |
| 'T_Rücklauf_Quelle', | |
| 'T_Rücklauf_Senke', | |
| 'T_Vorlauf_Senke', | |
| 'Kompressor_Nr_Stufe1', | |
| 'COP', | |
| 'COP_Lorenz', | |
| 'source_file', | |
| 't_diff_senke', | |
| 't_diff_quelle', | |
| 'temp_hub' | |
| ]].copy() | |
| # Drop missing values | |
| df = df.dropna() | |
| # Convert columns for filtering | |
| df['Kältemittel_filter'] = df['Kältemittel'].astype(str) | |
| df['Kompressor_filter'] = df['Kompressor_Nr_Stufe1'].astype(float).astype(int).astype(str) | |
| # Combine Kältemittel and compressor stage correctly | |
| df['Kältemittel_stufen'] = ( | |
| df['Kältemittel_filter'] + '_' + df['Kompressor_filter'] | |
| ) | |
| # Sort dataframe by default temperature columns | |
| df = df.sort_values( | |
| by=['T_Rücklauf_Quelle', 'T_Vorlauf_Senke'], | |
| ascending=[True, True] | |
| ) | |
| # ============================================================ | |
| # Define default column names | |
| # ============================================================ | |
| default_x = 'T_Rücklauf_Quelle' | |
| default_y = 'T_Vorlauf_Senke' | |
| default_z = 'COP' | |
| col_kalt = 'Kältemittel_stufen' | |
| plot_df = df.copy() | |
| # ============================================================ | |
| # Find selectable numeric axis columns | |
| # ============================================================ | |
| axis_options = [] | |
| for col in plot_df.columns: | |
| numeric_version = pd.to_numeric(plot_df[col], errors='coerce') | |
| if numeric_version.notna().sum() > 0 and numeric_version.nunique() >= 2: | |
| axis_options.append(col) | |
| # Make sure defaults exist | |
| if default_x not in axis_options: | |
| axis_options.append(default_x) | |
| if default_y not in axis_options: | |
| axis_options.append(default_y) | |
| if default_z not in axis_options: | |
| axis_options.append(default_z) | |
| # Optional: nicer ordering | |
| preferred_order = [ | |
| 'T_Vorlauf_Quelle', | |
| 'T_Rücklauf_Quelle', | |
| 'T_Rücklauf_Senke', | |
| 'T_Vorlauf_Senke', | |
| 't_diff_senke', | |
| 't_diff_quelle', | |
| 'temp_hub', | |
| 'COP', | |
| 'COP_Lorenz', | |
| 'Kompressor_Nr_Stufe1' | |
| ] | |
| axis_options = [c for c in preferred_order if c in axis_options] + [ | |
| c for c in axis_options if c not in preferred_order | |
| ] | |
| # ============================================================ | |
| # Check missing required columns | |
| # ============================================================ | |
| required_cols = [ | |
| default_x, | |
| default_y, | |
| default_z, | |
| 'COP_Lorenz', | |
| col_kalt, | |
| 'Kältemittel_filter', | |
| 'Kompressor_filter' | |
| ] | |
| missing = [col for col in required_cols if col not in plot_df.columns] | |
| if missing: | |
| print("Missing columns:", missing) | |
| else: | |
| print("Preparing stacked 3D surface plot with filters and selectable axes...") | |
| # ============================================================ | |
| # Options for filters | |
| # ============================================================ | |
| kalt_options = sorted(plot_df['Kältemittel_filter'].dropna().unique()) | |
| komp_options = sorted( | |
| plot_df['Kompressor_filter'].dropna().unique(), | |
| key=lambda x: int(x) | |
| ) | |
| # Same Plotly colorscales as your original code | |
| colorscales = [ | |
| 'Viridis', 'Plasma', 'Inferno', 'Magma', | |
| 'Cividis', 'Blues', 'Greens', 'Reds' | |
| ] | |
| # Stable global order, so colors stay consistent | |
| all_groups = sorted(plot_df[col_kalt].dropna().unique()) | |
| group_color_map = { | |
| group: colorscales[idx % len(colorscales)] | |
| for idx, group in enumerate(all_groups) | |
| } | |
| # ============================================================ | |
| # Widgets | |
| # ============================================================ | |
| select_kalt = widgets.SelectMultiple( | |
| options=kalt_options, | |
| value=tuple(kalt_options), | |
| description='Kältemittel:', | |
| rows=min(8, len(kalt_options)), | |
| style={'description_width': 'initial'}, | |
| layout=widgets.Layout(width='300px') | |
| ) | |
| select_komp = widgets.SelectMultiple( | |
| options=komp_options, | |
| value=tuple(komp_options), | |
| description='Kompressor:', | |
| rows=min(8, len(komp_options)), | |
| style={'description_width': 'initial'}, | |
| layout=widgets.Layout(width='300px') | |
| ) | |
| select_x = widgets.Dropdown( | |
| options=axis_options, | |
| value=default_x, | |
| description='X-Axis:', | |
| style={'description_width': 'initial'}, | |
| layout=widgets.Layout(width='280px') | |
| ) | |
| select_y = widgets.Dropdown( | |
| options=axis_options, | |
| value=default_y, | |
| description='Y-Axis:', | |
| style={'description_width': 'initial'}, | |
| layout=widgets.Layout(width='280px') | |
| ) | |
| select_z = widgets.Dropdown( | |
| options=axis_options, | |
| value=default_z, | |
| description='Z-Axis:', | |
| style={'description_width': 'initial'}, | |
| layout=widgets.Layout(width='280px') | |
| ) | |
| button_update = widgets.Button( | |
| description='Update Plot', | |
| button_style='primary', | |
| icon='refresh' | |
| ) | |
| button_all = widgets.Button( | |
| description='Select All', | |
| button_style='success' | |
| ) | |
| button_clear = widgets.Button( | |
| description='Clear All', | |
| button_style='warning' | |
| ) | |
| output = widgets.Output() | |
| # ============================================================ | |
| # Plot function | |
| # ============================================================ | |
| def create_stacked_surface_plot(_=None): | |
| with output: | |
| clear_output(wait=True) | |
| selected_kalt = list(select_kalt.value) | |
| selected_komp = list(select_komp.value) | |
| selected_x = select_x.value | |
| selected_y = select_y.value | |
| selected_z = select_z.value | |
| if len(selected_kalt) == 0: | |
| print("Please select at least one Kältemittel.") | |
| return | |
| if len(selected_komp) == 0: | |
| print("Please select at least one Kompressor_Nr_Stufe1.") | |
| return | |
| if selected_x == selected_y: | |
| print("X-axis and Y-axis cannot be the same for a surface plot.") | |
| return | |
| filtered_df = plot_df[ | |
| (plot_df['Kältemittel_filter'].isin(selected_kalt)) & | |
| (plot_df['Kompressor_filter'].isin(selected_komp)) | |
| ].copy() | |
| # Convert selected axis columns to numeric | |
| filtered_df[selected_x] = pd.to_numeric(filtered_df[selected_x], errors='coerce') | |
| filtered_df[selected_y] = pd.to_numeric(filtered_df[selected_y], errors='coerce') | |
| filtered_df[selected_z] = pd.to_numeric(filtered_df[selected_z], errors='coerce') | |
| # Drop rows where selected axis values are missing | |
| filtered_df = filtered_df.dropna(subset=[selected_x, selected_y, selected_z]) | |
| print("Selected Kältemittel:", selected_kalt) | |
| print("Selected Kompressor:", selected_komp) | |
| print("Selected X-axis:", selected_x) | |
| print("Selected Y-axis:", selected_y) | |
| print("Selected Z-axis:", selected_z) | |
| print("Rows after filtering:", len(filtered_df)) | |
| if filtered_df.empty: | |
| print("No data for selected filters and selected axes.") | |
| return | |
| fig2 = go.Figure() | |
| unique_kalt = sorted(filtered_df[col_kalt].dropna().unique()) | |
| added_traces = 0 | |
| for kalt_val in unique_kalt: | |
| subset = filtered_df[filtered_df[col_kalt] == kalt_val].copy() | |
| if len(subset) < 3: | |
| continue | |
| pivot = subset.pivot_table( | |
| values=selected_z, | |
| index=selected_y, | |
| columns=selected_x, | |
| aggfunc='mean' | |
| ) | |
| # Sort both axes increasing | |
| pivot = pivot.sort_index(axis=0, ascending=True) | |
| pivot = pivot.sort_index(axis=1, ascending=True) | |
| # Skip empty or too-small pivots | |
| if pivot.empty: | |
| continue | |
| if pivot.shape[0] < 2 or pivot.shape[1] < 2: | |
| continue | |
| cscale = group_color_map.get(kalt_val, 'Viridis') | |
| trace = go.Surface( | |
| x=pivot.columns.values, | |
| y=pivot.index.values, | |
| z=pivot.values, | |
| name=str(kalt_val), | |
| showscale=False, | |
| colorscale=cscale, | |
| showlegend=True, | |
| hovertemplate=( | |
| f"Kältemittel/Stufe: {kalt_val}<br>" | |
| f"{selected_x}: %{{x}}<br>" | |
| f"{selected_y}: %{{y}}<br>" | |
| f"{selected_z}: %{{z}}" | |
| "<extra></extra>" | |
| ) | |
| ) | |
| fig2.add_trace(trace) | |
| added_traces += 1 | |
| if added_traces == 0: | |
| print("Not enough data points to create surfaces for this selection.") | |
| return | |
| fig2.update_layout( | |
| title=f"Stacked {selected_z} Surfaces by Kältemittel / Kompressor", | |
| scene=dict( | |
| xaxis=dict( | |
| title=selected_x, | |
| autorange='reversed' if selected_x == default_x else True | |
| ), | |
| yaxis=dict( | |
| title=selected_y, | |
| autorange=True | |
| ), | |
| zaxis=dict( | |
| title=selected_z, | |
| autorange=True | |
| ), | |
| camera=dict( | |
| eye=dict(x=1.7, y=-1.7, z=1.2) | |
| ) | |
| ), | |
| legend=dict( | |
| title="Kältemittel / Stufe<br>Click to toggle", | |
| x=1.05, | |
| y=0.9 | |
| ), | |
| autosize=False, | |
| width=950, | |
| height=820, | |
| margin=dict(l=65, r=50, b=65, t=90) | |
| ) | |
| fig2.show( | |
| renderer="notebook", | |
| config={ | |
| "scrollZoom": True, | |
| "displayModeBar": True, | |
| "responsive": True | |
| } | |
| ) | |
| # ============================================================ | |
| # Button functions | |
| # ============================================================ | |
| def select_all_filters(_=None): | |
| select_kalt.value = tuple(kalt_options) | |
| select_komp.value = tuple(komp_options) | |
| def clear_all_filters(_=None): | |
| select_kalt.value = tuple() | |
| select_komp.value = tuple() | |
| button_update.on_click(create_stacked_surface_plot) | |
| button_all.on_click(select_all_filters) | |
| button_clear.on_click(clear_all_filters) | |
| # ============================================================ | |
| # Display UI on top | |
| # ============================================================ | |
| controls = widgets.VBox([ | |
| widgets.HTML( | |
| "<b>Select/deselect filters and choose axes:</b><br>" | |
| "Use Ctrl+Click or Cmd+Click to select multiple Kältemittel/Kompressors." | |
| ), | |
| widgets.HBox([ | |
| select_kalt, | |
| select_komp | |
| ]), | |
| widgets.HBox([ | |
| select_x, | |
| select_y, | |
| select_z | |
| ]), | |
| widgets.HBox([ | |
| button_update, | |
| button_all, | |
| button_clear | |
| ]) | |
| ]) | |
| display(controls, output) | |
| print("Select filters/axes above and click 'Update Plot'.") | |
| #%% | |
| import subprocess | |
| import sys | |
| def export_to_html(script_name="cop_visualization.py"): | |
| """ | |
| Exports the current script/notebook to an HTML file. | |
| Note: If you are using an Interactive Window in VS Code, you can also | |
| just click the 'Export' button in the toolbar at the top of the window! | |
| """ | |
| print(f"Exporting {script_name} to HTML...") | |
| try: | |
| # First, we need to convert the .py script to a .ipynb notebook using jupytext | |
| print("1. Converting .py to .ipynb format...") | |
| subprocess.run([sys.executable, "-m", "jupytext", "--to", "notebook", script_name], check=True) | |
| notebook_name = script_name.replace(".py", ".ipynb") | |
| print("2. Executing notebook and generating HTML...") | |
| result = subprocess.run( | |
| [sys.executable, "-m", "jupyter", "nbconvert", "--to", "html", "--execute", notebook_name], | |
| capture_output=True, text=True | |
| ) | |
| if result.returncode == 0: | |
| print(f"Successfully exported to HTML! Look for {notebook_name.replace('.ipynb', '.html')} in your directory.") | |
| # Optional: Clean up the intermediate .ipynb file | |
| import os | |
| if os.path.exists(notebook_name): | |
| os.remove(notebook_name) | |
| else: | |
| print("Failed to export. Error:") | |
| print(result.stderr) | |
| except Exception as e: | |
| print(f"Error during export: {e}") | |
| print("Make sure you have jupyter, nbconvert, and jupytext installed.") | |
| # Uncomment the line below to automatically export to HTML when you "Run All" | |
| export_to_html("cop_visualization.py") | |
| # %% | |