Spaces:
Running
Running
| from itertools import product | |
| from pathlib import Path | |
| import numpy as np | |
| import pandas as pd | |
| from dask.distributed import Client | |
| from dask_jobqueue import SLURMCluster | |
| from mlip_arena.models import MLIPEnum | |
| from mlip_arena.tasks import ELASTICITY, OPT, PHONON | |
| from mlip_arena.tasks.optimize import run as OPT | |
| from mlip_arena.tasks.utils import get_calculator | |
| from numpy import linalg as LA | |
| from prefect import flow, task | |
| from prefect_dask import DaskTaskRunner | |
| from tqdm.auto import tqdm | |
| from ase.db import connect | |
| select_models = [ | |
| "ALIGNN", | |
| "CHGNet", | |
| "M3GNet", | |
| "MACE-MP(M)", | |
| "MACE-MPA", | |
| "MatterSim", | |
| "ORBv2", | |
| "SevenNet", | |
| ] | |
| def elastic_tensor_to_voigt(C): | |
| """ | |
| Convert a rank-4 (3x3x3x3) elastic tensor into a rank-2 (6x6) tensor using Voigt notation. | |
| Parameters: | |
| C (numpy.ndarray): A 3x3x3x3 elastic tensor. | |
| Returns: | |
| numpy.ndarray: A 6x6 elastic tensor in Voigt notation. | |
| """ | |
| # voigt_map = { | |
| # (0, 0): 0, (1, 1): 1, (2, 2): 2, # Normal components | |
| # (1, 2): 3, (2, 1): 3, # Shear components | |
| # (0, 2): 4, (2, 0): 4, | |
| # (0, 1): 5, (1, 0): 5 | |
| # } | |
| voigt_map = { | |
| (0, 0): 0, | |
| (1, 1): 1, | |
| (2, 2): -1, # Normal components | |
| (1, 2): -1, | |
| (2, 1): -1, # Shear components | |
| (0, 2): -1, | |
| (2, 0): -1, | |
| (0, 1): 2, | |
| (1, 0): 2, | |
| } | |
| C_voigt = np.zeros((3, 3)) | |
| for i in range(3): | |
| for j in range(3): | |
| for k in range(3): | |
| for l in range(3): | |
| alpha = voigt_map[(i, j)] | |
| beta = voigt_map[(k, l)] | |
| if alpha == -1 or beta == -1: | |
| continue | |
| factor = 1 | |
| # if alpha in [3, 4, 5]: | |
| if alpha == 2: | |
| factor = factor * (2**0.5) | |
| if beta == 2: | |
| factor = factor * (2**0.5) | |
| C_voigt[alpha, beta] = C[i, j, k, l] * factor | |
| return C_voigt | |
| # - | |
| def run_one(model, row): | |
| if Path(f"{model.name}.pkl").exists(): | |
| df = pd.read_pickle(f"{model.name}.pkl") | |
| # if row.key_value_pairs.get('uid', None) in df['uid'].unique(): | |
| # pass | |
| else: | |
| df = pd.DataFrame(columns=["model", "uid", "eigenvalues", "frequencies"]) | |
| atoms = row.toatoms() | |
| # print(data := row.key_value_pairs) | |
| calc = get_calculator(model) | |
| result_opt = OPT( | |
| atoms, | |
| calc, | |
| optimizer="FIRE", | |
| criterion=dict(fmax=0.05, steps=500), | |
| symmetry=True, | |
| ) | |
| atoms = result_opt["atoms"] | |
| result_elastic = ELASTICITY( | |
| atoms, | |
| calc, | |
| optimizer="FIRE", | |
| criterion=dict(fmax=0.05, steps=500), | |
| pre_relax=False, | |
| ) | |
| elastic_tensor = elastic_tensor_to_voigt(result_elastic["elastic_tensor"]) | |
| eigenvalues, eigenvectors = LA.eig(elastic_tensor) | |
| outdir = Path(f"{model.name}") / row.key_value_pairs.get( | |
| "uid", atoms.get_chemical_formula() | |
| ) | |
| outdir.mkdir(parents=True, exist_ok=True) | |
| np.savez(outdir / "elastic.npz", tensor=elastic_tensor, eigenvalues=eigenvalues) | |
| result_phonon = PHONON( | |
| atoms, | |
| calc, | |
| supercell_matrix=(2, 2, 1), | |
| outdir=outdir, | |
| ) | |
| frequencies = result_phonon["phonon"].get_frequencies(q=(0, 0, 0)) | |
| new_row = pd.DataFrame( | |
| [ | |
| { | |
| "model": model.name, | |
| "uid": row.key_value_pairs.get("uid", None), | |
| "eigenvalues": eigenvalues, | |
| "frequencies": frequencies, | |
| } | |
| ] | |
| ) | |
| df = pd.concat([df, new_row], ignore_index=True) | |
| df.drop_duplicates(subset=["model", "uid"], keep="last", inplace=True) | |
| df.to_pickle(f"{model.name}.pkl") | |
| def run_all(): | |
| import random | |
| random.seed(0) | |
| futures = [] | |
| with connect("c2db.db") as db: | |
| random_indices = random.sample(range(1, len(db) + 1), 1000) | |
| for row, model in tqdm( | |
| product(db.select(filter=lambda r: r["id"] in random_indices), MLIPEnum) | |
| ): | |
| if model.name not in select_models: | |
| continue | |
| future = run_one.submit(model, row) | |
| futures.append(future) | |
| return [f.result(raise_on_failure=False) for f in futures] | |
| # + | |
| if __name__ == "__main__": | |
| nodes_per_alloc = 1 | |
| gpus_per_alloc = 1 | |
| ntasks = 1 | |
| cluster_kwargs = dict( | |
| cores=1, | |
| memory="64 GB", | |
| processes=1, | |
| shebang="#!/bin/bash", | |
| account="matgen", | |
| walltime="00:30:00", | |
| # job_cpu=128, | |
| job_mem="0", | |
| job_script_prologue=[ | |
| "source ~/.bashrc", | |
| "module load python", | |
| "source activate /pscratch/sd/c/cyrusyc/.conda/dev", | |
| ], | |
| job_directives_skip=["-n", "--cpus-per-task", "-J"], | |
| job_extra_directives=[ | |
| "-J c2db", | |
| "-q regular", | |
| f"-N {nodes_per_alloc}", | |
| "-C gpu", | |
| f"-G {gpus_per_alloc}", | |
| ], | |
| ) | |
| cluster = SLURMCluster(**cluster_kwargs) | |
| print(cluster.job_script()) | |
| cluster.adapt(minimum_jobs=25, maximum_jobs=50) | |
| client = Client(cluster) | |
| # - | |
| run_all.with_options( | |
| task_runner=DaskTaskRunner(address=client.scheduler.address), log_prints=True | |
| )() | |