mlip-arena / serve /ranks /combustion.py
github-actions[ci]
Clean sync from main branch - 2025-10-16 23:00:12
afe68b4
from pathlib import Path
import numpy as np
import pandas as pd
import streamlit as st
from mlip_arena.models import REGISTRY as MODELS
DATA_DIR = Path(__file__).parents[2] / "benchmarks" / "combustion"
valid_models = [
model
for model, metadata in MODELS.items()
if Path(__file__).stem in metadata.get("gpu-tasks", [])
]
@st.cache_data
def get_data(models):
dfs = [
pd.read_json(DATA_DIR / MODELS[model]["family"].lower() / f"{model}_H256O128.json") for model in models
]
df = pd.concat(dfs, ignore_index=True)
df.drop_duplicates(inplace=True, subset=["formula", "method"])
return df
df = get_data(valid_models)
@st.cache_data
def get_com_drifts(df):
df_exploded = df.explode(["timestep", "energies", "com_drifts"]).reset_index(
drop=True
)
# Convert the 'com_drifts' column (which are arrays) into separate columns for x, y, and z components
df_exploded[["com_drift_x", "com_drift_y", "com_drift_z"]] = pd.DataFrame(
df_exploded["com_drifts"].tolist(), index=df_exploded.index
)
# Drop the original 'com_drifts' column
df_flat = df_exploded.drop(columns=["com_drifts"])
df_flat["total_com_drift"] = np.sqrt(
df_flat["com_drift_x"] ** 2
+ df_flat["com_drift_y"] ** 2
+ df_flat["com_drift_z"] ** 2
)
df_flat = df_flat.drop(columns=["com_drift_x", "com_drift_y", "com_drift_z"])
return df_flat
df_exploded = get_com_drifts(df)
exp_ref = -68.3078 # kcal/mol
for method, row in df_exploded.groupby("method"):
# # row = df[df["method"] == method].iloc[0]
energies = np.array(row["energies"])
df_exploded.loc[df_exploded["method"] == method, "reaction_enthlapy_diff"] = (
(energies[-1] - energies[0]) / 128 * 23.0
) - exp_ref
df_exploded.loc[df_exploded["method"] == method, "final_com_drift"] = np.array(
row["total_com_drift"]
)[-1]
df_exploded.drop(
columns=[
"temperatures",
"pressures",
"total_steps",
"energies",
"kinetic_energies",
"timestep",
"nproducts",
"total_com_drift",
"target_steps",
"reaction",
"formula",
"natoms",
"seconds_per_step",
"seconds_per_step_per_atom",
"final_step",
"total_time_seconds",
],
axis=1,
inplace=True,
)
df_exploded.drop_duplicates(inplace=True, subset=["method"])
print(df_exploded.columns)
df_exploded.set_index("method", inplace=True)
df_exploded.rename(columns={"method": "Model"}, inplace=True)
table = pd.DataFrame()
for index, row in df_exploded.iterrows():
new_row = {
"Model": index,
"Reaction enthalpy error [kcal/mol]": row["reaction_enthlapy_diff"],
"Final COM drift [Å]": row["final_com_drift"],
"Steps per second": row["steps_per_second"],
"Yield [%]": row["yield"] * 100,
}
table = pd.concat([table, pd.DataFrame([new_row])], ignore_index=True)
table.set_index("Model", inplace=True)
table.sort_values("Reaction enthalpy error [kcal/mol]", ascending=True, inplace=True)
table["Rank"] = np.argsort(
np.abs(table["Reaction enthalpy error [kcal/mol]"].to_numpy())
)
table.sort_values("Final COM drift [Å]", ascending=True, inplace=True)
table["Rank"] += np.argsort(table["Final COM drift [Å]"].to_numpy())
table.sort_values("Steps per second", ascending=False, inplace=True)
table["Rank"] += np.argsort(-table["Steps per second"].to_numpy())
table.sort_values("Yield [%]", ascending=False, inplace=True)
table["Rank"] += np.argsort(-table["Yield [%]"].to_numpy())
table["Rank"] += 1
table.sort_values(["Rank"], ascending=True, inplace=True)
table["Rank aggr."] = table["Rank"]
table["Rank"] = table["Rank aggr."].rank(method="min").astype(int)
table = table.reindex(
columns=[
"Rank",
"Rank aggr.",
"Reaction enthalpy error [kcal/mol]",
"Final COM drift [Å]",
"Steps per second",
"Yield [%]",
]
)
@st.cache_data
def get_table():
return table
def render():
s = (
get_table()
.style.background_gradient(
cmap="Oranges",
subset=["Reaction enthalpy error [kcal/mol]"],
)
.background_gradient(
cmap="Oranges",
subset=["Final COM drift [Å]"],
gmap=np.log10(table["Final COM drift [Å]"].to_numpy() + 1e-10),
)
.background_gradient(cmap="Oranges_r", subset=["Steps per second", "Yield [%]"])
.background_gradient(
cmap="Blues",
subset=["Rank", "Rank aggr."],
)
.format(
"{:.3e}",
subset=["Final COM drift [Å]"],
)
)
st.dataframe(
s,
use_container_width=True,
)