Spaces:

DAG-UPB
/

HeatTransPlan

Running

App Files Files Community

HeatTransPlan / cop_analysis /join_cop copy.py

drzg15

Initial code commit with LFS for binaries

c993983 30 days ago

raw

history blame contribute delete

9.84 kB


	# %% [markdown]
	# # COP Results Joiner
	# This notebook joins all Excel files from `data/cop_modelling` into a single Parquet file.

	# %%
	import pandas as pd
	import os
	from pathlib import Path

	# %%
	# Define paths
	# Try to resolve data path dynamically based on current working directory
	current_dir = Path.cwd()
	if (current_dir / "data" / "cop_modelling").exists():
	data_path = current_dir / "data" / "cop_modelling"
	elif (current_dir.parent / "data" / "cop_modelling").exists():
	data_path = current_dir.parent / "data" / "cop_modelling"
	else:
	# Fallback
	data_path = Path("..") / "data" / "cop_modelling"

	output_file = data_path / "joined_results.parquet"

	# Configuration
	LOAD_FROM_PARQUET = True # Set to False to rebuild from Excel files

	# %%
	if LOAD_FROM_PARQUET and output_file.exists():
	print(f"Loading data directly from {output_file.name}...")
	joined_df = pd.read_parquet(output_file)
	print(f"Loaded shape: {joined_df.shape}")
	else:
	# Get all Excel files
	excel_files = list(data_path.glob("*.xlsx"))
	print(f"Found {len(excel_files)} files in {data_path.resolve()}: {[f.name for f in excel_files]}")

	# Load and join
	dfs = []
	for f in excel_files:
	try:
	# Results are in 'Results' sheet
	df = pd.read_excel(f, sheet_name='Results')

	# Drop the first row (which usually contains units)
	df = df.iloc[1:].reset_index(drop=True)

	# Add a column to identify the source
	df['source_file'] = f.name

	# Convert columns to numerical if possible, else convert to strings
	for col in df.columns:
	try:
	df[col] = pd.to_numeric(df[col], errors='raise')
	except (ValueError, TypeError):
	df[col] = df[col].astype(str)

	dfs.append(df)
	except Exception as e:
	print(f"Error reading {f}: {e}")

	if not dfs:
	raise ValueError(f"No objects to concatenate. Could not find or read any valid Excel files in {data_path.resolve()}.")

	joined_df = pd.concat(dfs, ignore_index=True)
	print(f"Joined shape: {joined_df.shape}")

	# Save to parquet
	joined_df.to_parquet(output_file)
	print(f"Saved to {output_file}")

	# %%
	# Quick preview
	joined_df


	# %%
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Set visual style
	sns.set_theme(style="whitegrid")

	# 1. Correlation matrix for numerical variables
	# Select only numerical columns
	numerical_cols = joined_df.select_dtypes(include=['number']).columns

	if len(numerical_cols) > 0:
	plt.figure(figsize=(10, 8))
	corr_matrix = joined_df[numerical_cols].corr()
	sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", vmin=-1, vmax=1)
	plt.title('Correlation Matrix of Numerical Variables')
	plt.tight_layout()
	plt.show()
	else:
	print("No numerical variables found for correlation matrix.")

	# %%
	# 2. Visualize the distribution of every variable
	# Numerical variables - Histograms
	for col in numerical_cols:
	plt.figure(figsize=(8, 4))
	sns.histplot(joined_df[col].dropna(), kde=True, bins=30)
	plt.title(f'Distribution of {col}')
	plt.tight_layout()
	plt.show()

	# Categorical variables - Count plots (limiting to top 20 categories if there are many)
	categorical_cols = joined_df.select_dtypes(exclude=['number']).columns
	for col in categorical_cols:
	plt.figure(figsize=(10, 5))
	# Get top 20 most frequent categories to avoid unreadable plots
	top_categories = joined_df[col].value_counts().nlargest(20).index
	sns.countplot(data=joined_df[joined_df[col].isin(top_categories)], x=col, order=top_categories)
	plt.title(f'Distribution of {col} (Top 20 categories)')
	plt.xticks(rotation=45, ha='right')
	plt.tight_layout()
	plt.show()

	# %%
	import plotly.graph_objects as go
	import pandas as pd
	import numpy as np

	print("Generating 3D Surface Plot...")

	# Find columns case-insensitively or by partial match to ensure it works
	cols = joined_df.columns.tolist()
	cols_lower = [c.lower() for c in cols]

	def find_col(possible_names):
	for name in possible_names:
	for idx, c in enumerate(cols_lower):
	if name.lower() in c:
	return cols[idx]
	return None

	# Attempting to match your exact columns or close variations
	col_quelle = find_col(['t_vorlauf_quelle', 'quelle'])
	col_senke = find_col(['t_vorlauf_senke', 'senke'])
	col_cop = find_col(['cop'])
	col_komp = find_col(['kompressor', 'stufe'])
	col_kalt = find_col(['kältemittel', 'kaltemittel', 'kaeltemittel', 'refrigerant'])

	required_cols = {'Quelle (X)': col_quelle, 'Senke (Y)': col_senke, 'COP (Z)': col_cop, 'Kompressor': col_komp, 'Kältemittel': col_kalt}
	missing = {k: v for k, v in required_cols.items() if v is None}

	if missing:
	print(f"Could not automatically find columns for: {list(missing.keys())}")
	print(f"Available columns are: {cols}")
	print("Please update the column names in the code below.")
	else:
	fig = go.Figure()

	# Drop NaNs for the relevant columns
	plot_df = joined_df.dropna(subset=list(required_cols.values())).copy()

	# Create combinations of Kompressor and Kältemittel
	combinations = plot_df.groupby([col_komp, col_kalt]).size().reset_index()

	traces = []
	buttons = []

	for i, row in combinations.iterrows():
	komp_val = str(row[col_komp])
	kalt_val = str(row[col_kalt])

	# Filter dataframe for this specific combination
	subset = plot_df[(plot_df[col_komp] == row[col_komp]) & (plot_df[col_kalt] == row[col_kalt])]

	if len(subset) < 3:
	continue # Skip if not enough points

	# Create a pivot table to form the 2D grid for the surface
	# Aggregating by mean in case there are duplicate combinations of (Quelle, Senke)
	pivot = subset.pivot_table(values=col_cop,
	index=col_senke,
	columns=col_quelle,
	aggfunc='mean')

	trace_name = f"{komp_val} \| {kalt_val}"

	trace = go.Surface(
	x=pivot.columns.values,
	y=pivot.index.values,
	z=pivot.values,
	name=trace_name,
	visible=(len(traces) == 0) # Only the first trace is visible initially
	)

	traces.append(trace)
	fig.add_trace(trace)

	# Generate the dropdown buttons
	for i, trace in enumerate(traces):
	visibility = [False] * len(traces)
	visibility[i] = True

	button = dict(
	label=trace.name,
	method="update",
	args=[{"visible": visibility},
	{"title": f"COP Surface - {trace.name}"}]
	)
	buttons.append(button)

	if traces:
	# Add the dropdown menu to the layout
	fig.update_layout(
	updatemenus=[
	dict(
	active=0,
	buttons=buttons,
	direction="down",
	pad={"r": 10, "t": 10},
	showactive=True,
	x=0.1,
	xanchor="left",
	y=1.15,
	yanchor="top"
	)
	],
	title=f"COP Surface - {traces[0].name}",
	scene=dict(
	xaxis_title=col_quelle,
	yaxis_title=col_senke,
	zaxis_title=col_cop
	),
	autosize=False,
	width=800,
	height=700,
	margin=dict(l=65, r=50, b=65, t=90)
	)
	# Show in a new browser tab to bypass nbformat issues in VS Code interactive mode
	fig.show(renderer="browser")
	else:
	print("Not enough data to plot surfaces for any combination.")

	# %%
	# Second Visual: Stacked Surfaces for Kältemittel
	print("Generating Stacked 3D Surface Plot by Kältemittel...")

	if not missing:
	fig2 = go.Figure()

	# List of built-in Plotly colorscales to differentiate the Kältemittel
	colorscales = ['Viridis', 'Plasma', 'Inferno', 'Magma', 'Cividis', 'Blues', 'Greens', 'Reds']

	unique_kalt = plot_df[col_kalt].dropna().unique()

	for idx, kalt_val in enumerate(unique_kalt):
	kalt_val = str(kalt_val)
	subset = plot_df[plot_df[col_kalt] == kalt_val]

	if len(subset) < 3:
	continue

	pivot = subset.pivot_table(values=col_cop,
	index=col_senke,
	columns=col_quelle,
	aggfunc='mean')

	cscale = colorscales[idx % len(colorscales)]

	trace = go.Surface(
	x=pivot.columns.values,
	y=pivot.index.values,
	z=pivot.values,
	name=kalt_val,
	showscale=False, # Hide individual colorbars so it's not cluttered
	colorscale=cscale,
	showlegend=True, # Allow selecting/deselecting from legend
	hovertemplate=f"Kältemittel: {kalt_val}<br>Quelle (X): %{{x}}<br>Senke (Y): %{{y}}<br>COP (Z): %{{z}}<extra></extra>"
	)
	fig2.add_trace(trace)

	fig2.update_layout(
	title="Stacked COP Surfaces by Kältemittel",
	scene=dict(
	xaxis_title=col_quelle,
	yaxis_title=col_senke,
	zaxis_title=col_cop
	),
	legend=dict(
	title="Kältemittel<br>(Click to toggle)",
	x=1.05,
	y=0.9
	),
	autosize=False,
	width=900,
	height=800,
	margin=dict(l=65, r=50, b=65, t=90)
	)

	fig2.show(renderer="browser")