Spaces:

atomind
/

mlip-arena

Running

mlip-arena / serve /ranks /combustion.py

github-actions[ci]

Clean sync from main branch - 2025-10-16 23:00:12

afe68b4 2 months ago

4.81 kB

	from pathlib import Path

	import numpy as np
	import pandas as pd
	import streamlit as st

	from mlip_arena.models import REGISTRY as MODELS

	DATA_DIR = Path(__file__).parents[2] / "benchmarks" / "combustion"

	valid_models = [
	model
	for model, metadata in MODELS.items()
	if Path(__file__).stem in metadata.get("gpu-tasks", [])
	]

	@st.cache_data
	def get_data(models):
	dfs = [
	pd.read_json(DATA_DIR / MODELS[model]["family"].lower() / f"{model}_H256O128.json") for model in models
	]
	df = pd.concat(dfs, ignore_index=True)
	df.drop_duplicates(inplace=True, subset=["formula", "method"])
	return df


	df = get_data(valid_models)


	@st.cache_data
	def get_com_drifts(df):
	df_exploded = df.explode(["timestep", "energies", "com_drifts"]).reset_index(
	drop=True
	)

	# Convert the 'com_drifts' column (which are arrays) into separate columns for x, y, and z components
	df_exploded[["com_drift_x", "com_drift_y", "com_drift_z"]] = pd.DataFrame(
	df_exploded["com_drifts"].tolist(), index=df_exploded.index
	)

	# Drop the original 'com_drifts' column
	df_flat = df_exploded.drop(columns=["com_drifts"])

	df_flat["total_com_drift"] = np.sqrt(
	df_flat["com_drift_x"] ** 2
	+ df_flat["com_drift_y"] ** 2
	+ df_flat["com_drift_z"] ** 2
	)

	df_flat = df_flat.drop(columns=["com_drift_x", "com_drift_y", "com_drift_z"])

	return df_flat


	df_exploded = get_com_drifts(df)

	exp_ref = -68.3078 # kcal/mol

	for method, row in df_exploded.groupby("method"):
	# # row = df[df["method"] == method].iloc[0]
	energies = np.array(row["energies"])
	df_exploded.loc[df_exploded["method"] == method, "reaction_enthlapy_diff"] = (
	(energies[-1] - energies[0]) / 128 * 23.0
	) - exp_ref
	df_exploded.loc[df_exploded["method"] == method, "final_com_drift"] = np.array(
	row["total_com_drift"]
	)[-1]


	df_exploded.drop(
	columns=[
	"temperatures",
	"pressures",
	"total_steps",
	"energies",
	"kinetic_energies",
	"timestep",
	"nproducts",
	"total_com_drift",
	"target_steps",
	"reaction",
	"formula",
	"natoms",
	"seconds_per_step",
	"seconds_per_step_per_atom",
	"final_step",
	"total_time_seconds",
	],
	axis=1,
	inplace=True,
	)

	df_exploded.drop_duplicates(inplace=True, subset=["method"])

	print(df_exploded.columns)

	df_exploded.set_index("method", inplace=True)

	df_exploded.rename(columns={"method": "Model"}, inplace=True)


	table = pd.DataFrame()

	for index, row in df_exploded.iterrows():
	new_row = {
	"Model": index,
	"Reaction enthalpy error [kcal/mol]": row["reaction_enthlapy_diff"],
	"Final COM drift [Å]": row["final_com_drift"],
	"Steps per second": row["steps_per_second"],
	"Yield [%]": row["yield"] * 100,
	}

	table = pd.concat([table, pd.DataFrame([new_row])], ignore_index=True)

	table.set_index("Model", inplace=True)

	table.sort_values("Reaction enthalpy error [kcal/mol]", ascending=True, inplace=True)
	table["Rank"] = np.argsort(
	np.abs(table["Reaction enthalpy error [kcal/mol]"].to_numpy())
	)

	table.sort_values("Final COM drift [Å]", ascending=True, inplace=True)
	table["Rank"] += np.argsort(table["Final COM drift [Å]"].to_numpy())

	table.sort_values("Steps per second", ascending=False, inplace=True)
	table["Rank"] += np.argsort(-table["Steps per second"].to_numpy())

	table.sort_values("Yield [%]", ascending=False, inplace=True)
	table["Rank"] += np.argsort(-table["Yield [%]"].to_numpy())

	table["Rank"] += 1

	table.sort_values(["Rank"], ascending=True, inplace=True)

	table["Rank aggr."] = table["Rank"]
	table["Rank"] = table["Rank aggr."].rank(method="min").astype(int)


	table = table.reindex(
	columns=[
	"Rank",
	"Rank aggr.",
	"Reaction enthalpy error [kcal/mol]",
	"Final COM drift [Å]",
	"Steps per second",
	"Yield [%]",
	]
	)


	@st.cache_data
	def get_table():
	return table


	def render():
	s = (
	get_table()
	.style.background_gradient(
	cmap="Oranges",
	subset=["Reaction enthalpy error [kcal/mol]"],
	)
	.background_gradient(
	cmap="Oranges",
	subset=["Final COM drift [Å]"],
	gmap=np.log10(table["Final COM drift [Å]"].to_numpy() + 1e-10),
	)
	.background_gradient(cmap="Oranges_r", subset=["Steps per second", "Yield [%]"])
	.background_gradient(
	cmap="Blues",
	subset=["Rank", "Rank aggr."],
	)
	.format(
	"{:.3e}",
	subset=["Final COM drift [Å]"],
	)
	)

	st.dataframe(
	s,
	use_container_width=True,
	)