Spaces:
Running
Running
| from pathlib import Path | |
| import numpy as np | |
| import pandas as pd | |
| import streamlit as st | |
| from mlip_arena.models import REGISTRY as MODELS | |
| DATA_DIR = Path(__file__).parents[2] / "benchmarks" / "combustion" | |
| valid_models = [ | |
| model | |
| for model, metadata in MODELS.items() | |
| if Path(__file__).stem in metadata.get("gpu-tasks", []) | |
| ] | |
| def get_data(models): | |
| dfs = [ | |
| pd.read_json(DATA_DIR / MODELS[model]["family"].lower() / f"{model}_H256O128.json") for model in models | |
| ] | |
| df = pd.concat(dfs, ignore_index=True) | |
| df.drop_duplicates(inplace=True, subset=["formula", "method"]) | |
| return df | |
| df = get_data(valid_models) | |
| def get_com_drifts(df): | |
| df_exploded = df.explode(["timestep", "energies", "com_drifts"]).reset_index( | |
| drop=True | |
| ) | |
| # Convert the 'com_drifts' column (which are arrays) into separate columns for x, y, and z components | |
| df_exploded[["com_drift_x", "com_drift_y", "com_drift_z"]] = pd.DataFrame( | |
| df_exploded["com_drifts"].tolist(), index=df_exploded.index | |
| ) | |
| # Drop the original 'com_drifts' column | |
| df_flat = df_exploded.drop(columns=["com_drifts"]) | |
| df_flat["total_com_drift"] = np.sqrt( | |
| df_flat["com_drift_x"] ** 2 | |
| + df_flat["com_drift_y"] ** 2 | |
| + df_flat["com_drift_z"] ** 2 | |
| ) | |
| df_flat = df_flat.drop(columns=["com_drift_x", "com_drift_y", "com_drift_z"]) | |
| return df_flat | |
| df_exploded = get_com_drifts(df) | |
| exp_ref = -68.3078 # kcal/mol | |
| for method, row in df_exploded.groupby("method"): | |
| # # row = df[df["method"] == method].iloc[0] | |
| energies = np.array(row["energies"]) | |
| df_exploded.loc[df_exploded["method"] == method, "reaction_enthlapy_diff"] = ( | |
| (energies[-1] - energies[0]) / 128 * 23.0 | |
| ) - exp_ref | |
| df_exploded.loc[df_exploded["method"] == method, "final_com_drift"] = np.array( | |
| row["total_com_drift"] | |
| )[-1] | |
| df_exploded.drop( | |
| columns=[ | |
| "temperatures", | |
| "pressures", | |
| "total_steps", | |
| "energies", | |
| "kinetic_energies", | |
| "timestep", | |
| "nproducts", | |
| "total_com_drift", | |
| "target_steps", | |
| "reaction", | |
| "formula", | |
| "natoms", | |
| "seconds_per_step", | |
| "seconds_per_step_per_atom", | |
| "final_step", | |
| "total_time_seconds", | |
| ], | |
| axis=1, | |
| inplace=True, | |
| ) | |
| df_exploded.drop_duplicates(inplace=True, subset=["method"]) | |
| print(df_exploded.columns) | |
| df_exploded.set_index("method", inplace=True) | |
| df_exploded.rename(columns={"method": "Model"}, inplace=True) | |
| table = pd.DataFrame() | |
| for index, row in df_exploded.iterrows(): | |
| new_row = { | |
| "Model": index, | |
| "Reaction enthalpy error [kcal/mol]": row["reaction_enthlapy_diff"], | |
| "Final COM drift [Å]": row["final_com_drift"], | |
| "Steps per second": row["steps_per_second"], | |
| "Yield [%]": row["yield"] * 100, | |
| } | |
| table = pd.concat([table, pd.DataFrame([new_row])], ignore_index=True) | |
| table.set_index("Model", inplace=True) | |
| table.sort_values("Reaction enthalpy error [kcal/mol]", ascending=True, inplace=True) | |
| table["Rank"] = np.argsort( | |
| np.abs(table["Reaction enthalpy error [kcal/mol]"].to_numpy()) | |
| ) | |
| table.sort_values("Final COM drift [Å]", ascending=True, inplace=True) | |
| table["Rank"] += np.argsort(table["Final COM drift [Å]"].to_numpy()) | |
| table.sort_values("Steps per second", ascending=False, inplace=True) | |
| table["Rank"] += np.argsort(-table["Steps per second"].to_numpy()) | |
| table.sort_values("Yield [%]", ascending=False, inplace=True) | |
| table["Rank"] += np.argsort(-table["Yield [%]"].to_numpy()) | |
| table["Rank"] += 1 | |
| table.sort_values(["Rank"], ascending=True, inplace=True) | |
| table["Rank aggr."] = table["Rank"] | |
| table["Rank"] = table["Rank aggr."].rank(method="min").astype(int) | |
| table = table.reindex( | |
| columns=[ | |
| "Rank", | |
| "Rank aggr.", | |
| "Reaction enthalpy error [kcal/mol]", | |
| "Final COM drift [Å]", | |
| "Steps per second", | |
| "Yield [%]", | |
| ] | |
| ) | |
| def get_table(): | |
| return table | |
| def render(): | |
| s = ( | |
| get_table() | |
| .style.background_gradient( | |
| cmap="Oranges", | |
| subset=["Reaction enthalpy error [kcal/mol]"], | |
| ) | |
| .background_gradient( | |
| cmap="Oranges", | |
| subset=["Final COM drift [Å]"], | |
| gmap=np.log10(table["Final COM drift [Å]"].to_numpy() + 1e-10), | |
| ) | |
| .background_gradient(cmap="Oranges_r", subset=["Steps per second", "Yield [%]"]) | |
| .background_gradient( | |
| cmap="Blues", | |
| subset=["Rank", "Rank aggr."], | |
| ) | |
| .format( | |
| "{:.3e}", | |
| subset=["Final COM drift [Å]"], | |
| ) | |
| ) | |
| st.dataframe( | |
| s, | |
| use_container_width=True, | |
| ) | |