| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | from pathlib import Path |
| | from typing import Union |
| |
|
| | |
| | |
| | WEIGHTS_PATH: str = "weights" |
| | CONTEXT_PATH: str = "context" |
| | ADAPTER_META_FILENAME = "adapter_metadata.json" |
| |
|
| | |
| | HF_WEIGHTS_PATH: str = "hf_weights" |
| | HF_ADAPTER_PATH: str = "hf_adapter" |
| | HF_ADAPTER_CONFIG_FILENAME = "adapter_config.json" |
| |
|
| |
|
| | def idempotent_path_append(base_dir: Union[str, Path], suffix) -> Path: |
| | """Appends a given suffix to a base directory path only if it is not already present. |
| | |
| | This function takes a base directory (either a string or Path) and ensures that |
| | the suffix is appended to the path. If the base directory is an AdapterPath instance, |
| | it also appends the suffix to the AdapterPath's base_model_path if the suffix |
| | is not already part of that path. |
| | |
| | Args: |
| | base_dir (Union[str, Path]): The base directory or path object. |
| | suffix (str): The suffix to append to the base directory. |
| | |
| | Returns: |
| | Path: The updated path object with the suffix appended if it was not already present. |
| | """ |
| | from nemo.lightning.resume import AdapterPath |
| | from nemo.utils.msc_utils import import_multistorageclient, is_multistorageclient_url |
| |
|
| | if is_multistorageclient_url(base_dir): |
| | msc = import_multistorageclient() |
| | base_dir = msc.Path(base_dir) |
| | else: |
| | base_dir = Path(base_dir) |
| |
|
| | if base_dir.parts[-1] != suffix: |
| | base_dir = base_dir / suffix |
| | if isinstance(base_dir, AdapterPath) and base_dir.base_model_path.parts[-1] != suffix: |
| | base_dir.base_model_path = base_dir.base_model_path / suffix |
| | return base_dir |
| |
|
| |
|
| | def ckpt_to_context_subdir(filepath: Union[str, Path]) -> Path: |
| | """Given an input checkpoint filepath, clean it using `ckpt_to_dir` and then return the context subdirectory.""" |
| | base_dir = ckpt_to_dir(filepath=filepath) |
| | return idempotent_path_append(base_dir, CONTEXT_PATH) |
| |
|
| |
|
| | def ckpt_to_dir(filepath: Union[str, Path]) -> Path: |
| | """PTL considers checkpoints as .ckpt files. |
| | This method removes the extension and returns a path |
| | to be used as a directory for distributed checkpoints |
| | """ |
| | from nemo.lightning.resume import AdapterPath |
| | from nemo.utils.msc_utils import import_multistorageclient, is_multistorageclient_url |
| |
|
| | if isinstance(filepath, AdapterPath): |
| | return filepath |
| |
|
| | if is_multistorageclient_url(filepath): |
| | msc = import_multistorageclient() |
| | filepath = msc.Path(filepath) |
| | else: |
| | filepath = Path(filepath) |
| |
|
| | if not filepath.suffix == ".ckpt": |
| | filepath = filepath.with_suffix(filepath.suffix + ".ckpt") |
| |
|
| | |
| | assert filepath.suffix == ".ckpt", f"filepath: {filepath} must have .ckpt extension" |
| |
|
| | |
| | checkpoint_dir = filepath.with_name(filepath.stem) |
| |
|
| | return checkpoint_dir |
| |
|