Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .venv/lib/python3.11/site-packages/ray/tune/examples/__init__.py +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/async_hyperband_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/ax_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/bayesopt_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/bohb_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/cifar10_pytorch.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/custom_func_checkpointing.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperband_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperband_function_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperopt_conditional_search_space_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/lightgbm_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/logging_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mlflow_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mlflow_ptl.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_ptl_mini.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_pytorch.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_pytorch_trainable.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/nevergrad_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_define_by_run_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_multiobjective_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pb2_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pb2_ppo_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_convnet_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_convnet_function_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_function.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_memnn_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_ppo_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_tune_cifar10_with_keras.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tf_mnist_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tune_basic_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tune_mnist_keras.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/xgboost_dynamic_resources_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/xgboost_example.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/async_hyperband_example.py +56 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/ax_example.py +97 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/bayesopt_example.py +63 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/bohb_example.py +108 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/custom_func_checkpointing.py +70 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/hyperband_example.py +44 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/hyperband_function_example.py +76 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/hyperopt_conditional_search_space_example.py +110 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/logging_example.py +64 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/mlflow_example.py +128 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/mlflow_ptl.py +105 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/mnist_ptl_mini.py +166 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/mnist_pytorch.py +161 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/mnist_pytorch_trainable.py +98 -0
- .venv/lib/python3.11/site-packages/ray/tune/examples/nevergrad_example.py +77 -0
.venv/lib/python3.11/site-packages/ray/tune/examples/__init__.py
ADDED
|
File without changes
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/async_hyperband_example.cpython-311.pyc
ADDED
|
Binary file (2.63 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/ax_example.cpython-311.pyc
ADDED
|
Binary file (4.72 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/bayesopt_example.cpython-311.pyc
ADDED
|
Binary file (3 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/bohb_example.cpython-311.pyc
ADDED
|
Binary file (4.82 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/cifar10_pytorch.cpython-311.pyc
ADDED
|
Binary file (15.6 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/custom_func_checkpointing.cpython-311.pyc
ADDED
|
Binary file (4.07 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperband_example.cpython-311.pyc
ADDED
|
Binary file (2.05 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperband_function_example.cpython-311.pyc
ADDED
|
Binary file (4.16 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperopt_conditional_search_space_example.cpython-311.pyc
ADDED
|
Binary file (4.67 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/lightgbm_example.cpython-311.pyc
ADDED
|
Binary file (3.68 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/logging_example.cpython-311.pyc
ADDED
|
Binary file (3.27 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mlflow_example.cpython-311.pyc
ADDED
|
Binary file (5.31 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mlflow_ptl.cpython-311.pyc
ADDED
|
Binary file (4.85 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_ptl_mini.cpython-311.pyc
ADDED
|
Binary file (12 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_pytorch.cpython-311.pyc
ADDED
|
Binary file (9.18 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_pytorch_trainable.cpython-311.pyc
ADDED
|
Binary file (5.36 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/nevergrad_example.cpython-311.pyc
ADDED
|
Binary file (3.1 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_define_by_run_example.cpython-311.pyc
ADDED
|
Binary file (4.48 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_example.cpython-311.pyc
ADDED
|
Binary file (3.37 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_multiobjective_example.cpython-311.pyc
ADDED
|
Binary file (3.44 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pb2_example.cpython-311.pyc
ADDED
|
Binary file (2.01 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pb2_ppo_example.cpython-311.pyc
ADDED
|
Binary file (7.29 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_convnet_example.cpython-311.pyc
ADDED
|
Binary file (6.59 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_convnet_function_example.cpython-311.pyc
ADDED
|
Binary file (6.61 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_example.cpython-311.pyc
ADDED
|
Binary file (5.46 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_function.cpython-311.pyc
ADDED
|
Binary file (6.4 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_memnn_example.cpython-311.pyc
ADDED
|
Binary file (16.7 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_ppo_example.cpython-311.pyc
ADDED
|
Binary file (3.44 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_tune_cifar10_with_keras.cpython-311.pyc
ADDED
|
Binary file (9.54 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tf_mnist_example.cpython-311.pyc
ADDED
|
Binary file (9.18 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tune_basic_example.cpython-311.pyc
ADDED
|
Binary file (2.48 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tune_mnist_keras.cpython-311.pyc
ADDED
|
Binary file (4.68 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/xgboost_dynamic_resources_example.cpython-311.pyc
ADDED
|
Binary file (6.95 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/xgboost_example.cpython-311.pyc
ADDED
|
Binary file (5.56 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/ray/tune/examples/async_hyperband_example.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
from ray import train, tune
|
| 7 |
+
from ray.tune.schedulers import AsyncHyperBandScheduler
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def evaluation_fn(step, width, height):
|
| 11 |
+
time.sleep(0.1)
|
| 12 |
+
return (0.1 + width * step / 100) ** (-1) + height * 0.1
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def easy_objective(config):
|
| 16 |
+
# Hyperparameters
|
| 17 |
+
width, height = config["width"], config["height"]
|
| 18 |
+
|
| 19 |
+
for step in range(config["steps"]):
|
| 20 |
+
# Iterative training function - can be an arbitrary training procedure
|
| 21 |
+
intermediate_score = evaluation_fn(step, width, height)
|
| 22 |
+
# Feed the score back back to Tune.
|
| 23 |
+
train.report({"iterations": step, "mean_loss": intermediate_score})
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
if __name__ == "__main__":
|
| 27 |
+
parser = argparse.ArgumentParser()
|
| 28 |
+
parser.add_argument(
|
| 29 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 30 |
+
)
|
| 31 |
+
args, _ = parser.parse_known_args()
|
| 32 |
+
|
| 33 |
+
# AsyncHyperBand enables aggressive early stopping of bad trials.
|
| 34 |
+
scheduler = AsyncHyperBandScheduler(grace_period=5, max_t=100)
|
| 35 |
+
|
| 36 |
+
# 'training_iteration' is incremented every time `trainable.step` is called
|
| 37 |
+
stopping_criteria = {"training_iteration": 1 if args.smoke_test else 9999}
|
| 38 |
+
|
| 39 |
+
tuner = tune.Tuner(
|
| 40 |
+
tune.with_resources(easy_objective, {"cpu": 1, "gpu": 0}),
|
| 41 |
+
run_config=train.RunConfig(
|
| 42 |
+
name="asynchyperband_test",
|
| 43 |
+
stop=stopping_criteria,
|
| 44 |
+
verbose=1,
|
| 45 |
+
),
|
| 46 |
+
tune_config=tune.TuneConfig(
|
| 47 |
+
metric="mean_loss", mode="min", scheduler=scheduler, num_samples=20
|
| 48 |
+
),
|
| 49 |
+
param_space={ # Hyperparameter space
|
| 50 |
+
"steps": 100,
|
| 51 |
+
"width": tune.uniform(10, 100),
|
| 52 |
+
"height": tune.uniform(0, 100),
|
| 53 |
+
},
|
| 54 |
+
)
|
| 55 |
+
results = tuner.fit()
|
| 56 |
+
print("Best hyperparameters found were: ", results.get_best_result().config)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/ax_example.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""This example demonstrates the usage of AxSearch with Ray Tune.
|
| 2 |
+
|
| 3 |
+
It also checks that it is usable with a separate scheduler.
|
| 4 |
+
|
| 5 |
+
Requires the Ax library to be installed (`pip install ax-platform`).
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import time
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
|
| 12 |
+
from ray import train, tune
|
| 13 |
+
from ray.tune.schedulers import AsyncHyperBandScheduler
|
| 14 |
+
from ray.tune.search.ax import AxSearch
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def hartmann6(x):
|
| 18 |
+
alpha = np.array([1.0, 1.2, 3.0, 3.2])
|
| 19 |
+
A = np.array(
|
| 20 |
+
[
|
| 21 |
+
[10, 3, 17, 3.5, 1.7, 8],
|
| 22 |
+
[0.05, 10, 17, 0.1, 8, 14],
|
| 23 |
+
[3, 3.5, 1.7, 10, 17, 8],
|
| 24 |
+
[17, 8, 0.05, 10, 0.1, 14],
|
| 25 |
+
]
|
| 26 |
+
)
|
| 27 |
+
P = 10 ** (-4) * np.array(
|
| 28 |
+
[
|
| 29 |
+
[1312, 1696, 5569, 124, 8283, 5886],
|
| 30 |
+
[2329, 4135, 8307, 3736, 1004, 9991],
|
| 31 |
+
[2348, 1451, 3522, 2883, 3047, 6650],
|
| 32 |
+
[4047, 8828, 8732, 5743, 1091, 381],
|
| 33 |
+
]
|
| 34 |
+
)
|
| 35 |
+
y = 0.0
|
| 36 |
+
for j, alpha_j in enumerate(alpha):
|
| 37 |
+
t = 0
|
| 38 |
+
for k in range(6):
|
| 39 |
+
t += A[j, k] * ((x[k] - P[j, k]) ** 2)
|
| 40 |
+
y -= alpha_j * np.exp(-t)
|
| 41 |
+
return y
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def easy_objective(config):
|
| 45 |
+
for i in range(config["iterations"]):
|
| 46 |
+
x = np.array([config.get("x{}".format(i + 1)) for i in range(6)])
|
| 47 |
+
train.report(
|
| 48 |
+
{
|
| 49 |
+
"timesteps_total": i,
|
| 50 |
+
"hartmann6": hartmann6(x),
|
| 51 |
+
"l2norm": np.sqrt((x**2).sum()),
|
| 52 |
+
}
|
| 53 |
+
)
|
| 54 |
+
time.sleep(0.02)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
if __name__ == "__main__":
|
| 58 |
+
import argparse
|
| 59 |
+
|
| 60 |
+
parser = argparse.ArgumentParser()
|
| 61 |
+
parser.add_argument(
|
| 62 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 63 |
+
)
|
| 64 |
+
args, _ = parser.parse_known_args()
|
| 65 |
+
|
| 66 |
+
algo = AxSearch(
|
| 67 |
+
parameter_constraints=["x1 + x2 <= 2.0"], # Optional.
|
| 68 |
+
outcome_constraints=["l2norm <= 1.25"], # Optional.
|
| 69 |
+
)
|
| 70 |
+
# Limit to 4 concurrent trials
|
| 71 |
+
algo = tune.search.ConcurrencyLimiter(algo, max_concurrent=4)
|
| 72 |
+
scheduler = AsyncHyperBandScheduler()
|
| 73 |
+
tuner = tune.Tuner(
|
| 74 |
+
easy_objective,
|
| 75 |
+
run_config=train.RunConfig(
|
| 76 |
+
name="ax",
|
| 77 |
+
stop={"timesteps_total": 100},
|
| 78 |
+
),
|
| 79 |
+
tune_config=tune.TuneConfig(
|
| 80 |
+
metric="hartmann6", # provided in the 'easy_objective' function
|
| 81 |
+
mode="min",
|
| 82 |
+
search_alg=algo,
|
| 83 |
+
scheduler=scheduler,
|
| 84 |
+
num_samples=10 if args.smoke_test else 50,
|
| 85 |
+
),
|
| 86 |
+
param_space={
|
| 87 |
+
"iterations": 100,
|
| 88 |
+
"x1": tune.uniform(0.0, 1.0),
|
| 89 |
+
"x2": tune.uniform(0.0, 1.0),
|
| 90 |
+
"x3": tune.uniform(0.0, 1.0),
|
| 91 |
+
"x4": tune.uniform(0.0, 1.0),
|
| 92 |
+
"x5": tune.uniform(0.0, 1.0),
|
| 93 |
+
"x6": tune.uniform(0.0, 1.0),
|
| 94 |
+
},
|
| 95 |
+
)
|
| 96 |
+
results = tuner.fit()
|
| 97 |
+
print("Best hyperparameters found were: ", results.get_best_result().config)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/bayesopt_example.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""This example demonstrates the usage of BayesOpt with Ray Tune.
|
| 2 |
+
|
| 3 |
+
It also checks that it is usable with a separate scheduler.
|
| 4 |
+
|
| 5 |
+
Requires the BayesOpt library to be installed (`pip install bayesian-optimization`).
|
| 6 |
+
"""
|
| 7 |
+
import time
|
| 8 |
+
|
| 9 |
+
from ray import train, tune
|
| 10 |
+
from ray.tune.schedulers import AsyncHyperBandScheduler
|
| 11 |
+
from ray.tune.search import ConcurrencyLimiter
|
| 12 |
+
from ray.tune.search.bayesopt import BayesOptSearch
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def evaluation_fn(step, width, height):
|
| 16 |
+
return (0.1 + width * step / 100) ** (-1) + height * 0.1
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def easy_objective(config):
|
| 20 |
+
# Hyperparameters
|
| 21 |
+
width, height = config["width"], config["height"]
|
| 22 |
+
|
| 23 |
+
for step in range(config["steps"]):
|
| 24 |
+
# Iterative training function - can be any arbitrary training procedure
|
| 25 |
+
intermediate_score = evaluation_fn(step, width, height)
|
| 26 |
+
# Feed the score back back to Tune.
|
| 27 |
+
train.report({"iterations": step, "mean_loss": intermediate_score})
|
| 28 |
+
time.sleep(0.1)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
if __name__ == "__main__":
|
| 32 |
+
import argparse
|
| 33 |
+
|
| 34 |
+
parser = argparse.ArgumentParser()
|
| 35 |
+
parser.add_argument(
|
| 36 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 37 |
+
)
|
| 38 |
+
args, _ = parser.parse_known_args()
|
| 39 |
+
|
| 40 |
+
algo = BayesOptSearch(utility_kwargs={"kind": "ucb", "kappa": 2.5, "xi": 0.0})
|
| 41 |
+
algo = ConcurrencyLimiter(algo, max_concurrent=4)
|
| 42 |
+
scheduler = AsyncHyperBandScheduler()
|
| 43 |
+
tuner = tune.Tuner(
|
| 44 |
+
easy_objective,
|
| 45 |
+
tune_config=tune.TuneConfig(
|
| 46 |
+
metric="mean_loss",
|
| 47 |
+
mode="min",
|
| 48 |
+
search_alg=algo,
|
| 49 |
+
scheduler=scheduler,
|
| 50 |
+
num_samples=10 if args.smoke_test else 1000,
|
| 51 |
+
),
|
| 52 |
+
run_config=train.RunConfig(
|
| 53 |
+
name="my_exp",
|
| 54 |
+
),
|
| 55 |
+
param_space={
|
| 56 |
+
"steps": 100,
|
| 57 |
+
"width": tune.uniform(0, 20),
|
| 58 |
+
"height": tune.uniform(-100, 100),
|
| 59 |
+
},
|
| 60 |
+
)
|
| 61 |
+
results = tuner.fit()
|
| 62 |
+
|
| 63 |
+
print("Best hyperparameters found were: ", results.get_best_result().config)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/bohb_example.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
|
| 3 |
+
"""This example demonstrates the usage of BOHB with Ray Tune.
|
| 4 |
+
|
| 5 |
+
Requires the HpBandSter and ConfigSpace libraries to be installed
|
| 6 |
+
(`pip install hpbandster ConfigSpace`).
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import json
|
| 10 |
+
import os
|
| 11 |
+
import time
|
| 12 |
+
|
| 13 |
+
import numpy as np
|
| 14 |
+
|
| 15 |
+
import ray
|
| 16 |
+
from ray import train, tune
|
| 17 |
+
from ray.tune import Trainable
|
| 18 |
+
from ray.tune.schedulers.hb_bohb import HyperBandForBOHB
|
| 19 |
+
from ray.tune.search.bohb import TuneBOHB
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class MyTrainableClass(Trainable):
|
| 23 |
+
"""Example agent whose learning curve is a random sigmoid.
|
| 24 |
+
|
| 25 |
+
The dummy hyperparameters "width" and "height" determine the slope and
|
| 26 |
+
maximum reward value reached.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
def setup(self, config):
|
| 30 |
+
self.timestep = 0
|
| 31 |
+
|
| 32 |
+
def step(self):
|
| 33 |
+
self.timestep += 1
|
| 34 |
+
v = np.tanh(float(self.timestep) / self.config.get("width", 1))
|
| 35 |
+
v *= self.config.get("height", 1)
|
| 36 |
+
time.sleep(0.1)
|
| 37 |
+
# Here we use `episode_reward_mean`, but you can also report other
|
| 38 |
+
# objectives such as loss or accuracy.
|
| 39 |
+
return {"episode_reward_mean": v}
|
| 40 |
+
|
| 41 |
+
def save_checkpoint(self, checkpoint_dir):
|
| 42 |
+
path = os.path.join(checkpoint_dir, "checkpoint")
|
| 43 |
+
with open(path, "w") as f:
|
| 44 |
+
f.write(json.dumps({"timestep": self.timestep}))
|
| 45 |
+
|
| 46 |
+
def load_checkpoint(self, checkpoint_dir):
|
| 47 |
+
path = os.path.join(checkpoint_dir, "checkpoint")
|
| 48 |
+
with open(path, "r") as f:
|
| 49 |
+
self.timestep = json.loads(f.read())["timestep"]
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
if __name__ == "__main__":
|
| 53 |
+
import sys
|
| 54 |
+
|
| 55 |
+
if sys.version_info >= (3, 12):
|
| 56 |
+
# TuneBOHB is not compatible with Python 3.12
|
| 57 |
+
sys.exit(0)
|
| 58 |
+
|
| 59 |
+
ray.init(num_cpus=8)
|
| 60 |
+
|
| 61 |
+
config = {
|
| 62 |
+
"iterations": 100,
|
| 63 |
+
"width": tune.uniform(0, 20),
|
| 64 |
+
"height": tune.uniform(-100, 100),
|
| 65 |
+
"activation": tune.choice(["relu", "tanh"]),
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
# Optional: Pass the parameter space yourself
|
| 69 |
+
# import ConfigSpace as CS
|
| 70 |
+
# config_space = CS.ConfigurationSpace()
|
| 71 |
+
# config_space.add_hyperparameter(
|
| 72 |
+
# CS.UniformFloatHyperparameter("width", lower=0, upper=20))
|
| 73 |
+
# config_space.add_hyperparameter(
|
| 74 |
+
# CS.UniformFloatHyperparameter("height", lower=-100, upper=100))
|
| 75 |
+
# config_space.add_hyperparameter(
|
| 76 |
+
# CS.CategoricalHyperparameter(
|
| 77 |
+
# "activation", choices=["relu", "tanh"]))
|
| 78 |
+
|
| 79 |
+
max_iterations = 10
|
| 80 |
+
bohb_hyperband = HyperBandForBOHB(
|
| 81 |
+
time_attr="training_iteration",
|
| 82 |
+
max_t=max_iterations,
|
| 83 |
+
reduction_factor=2,
|
| 84 |
+
stop_last_trials=False,
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
bohb_search = TuneBOHB(
|
| 88 |
+
# space=config_space, # If you want to set the space manually
|
| 89 |
+
)
|
| 90 |
+
bohb_search = tune.search.ConcurrencyLimiter(bohb_search, max_concurrent=4)
|
| 91 |
+
|
| 92 |
+
tuner = tune.Tuner(
|
| 93 |
+
MyTrainableClass,
|
| 94 |
+
run_config=train.RunConfig(
|
| 95 |
+
name="bohb_test", stop={"training_iteration": max_iterations}
|
| 96 |
+
),
|
| 97 |
+
tune_config=tune.TuneConfig(
|
| 98 |
+
metric="episode_reward_mean",
|
| 99 |
+
mode="max",
|
| 100 |
+
scheduler=bohb_hyperband,
|
| 101 |
+
search_alg=bohb_search,
|
| 102 |
+
num_samples=32,
|
| 103 |
+
),
|
| 104 |
+
param_space=config,
|
| 105 |
+
)
|
| 106 |
+
results = tuner.fit()
|
| 107 |
+
|
| 108 |
+
print("Best hyperparameters found were: ", results.get_best_result().config)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/custom_func_checkpointing.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# If want to use checkpointing with a custom training function (not a Ray
|
| 2 |
+
# integration like PyTorch or Tensorflow), your function can read/write
|
| 3 |
+
# checkpoint through the ``ray.train.report(metrics, checkpoint=...)`` API.
|
| 4 |
+
import argparse
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import tempfile
|
| 8 |
+
import time
|
| 9 |
+
|
| 10 |
+
from ray import train, tune
|
| 11 |
+
from ray.train import Checkpoint
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def evaluation_fn(step, width, height):
|
| 15 |
+
time.sleep(0.1)
|
| 16 |
+
return (0.1 + width * step / 100) ** (-1) + height * 0.1
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def train_func(config):
|
| 20 |
+
step = 0
|
| 21 |
+
width, height = config["width"], config["height"]
|
| 22 |
+
|
| 23 |
+
checkpoint = train.get_checkpoint()
|
| 24 |
+
if checkpoint:
|
| 25 |
+
with checkpoint.as_directory() as checkpoint_dir:
|
| 26 |
+
with open(os.path.join(checkpoint_dir, "checkpoint.json")) as f:
|
| 27 |
+
state = json.load(f)
|
| 28 |
+
step = state["step"] + 1
|
| 29 |
+
|
| 30 |
+
for current_step in range(step, 100):
|
| 31 |
+
intermediate_score = evaluation_fn(current_step, width, height)
|
| 32 |
+
|
| 33 |
+
with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
|
| 34 |
+
with open(os.path.join(temp_checkpoint_dir, "checkpoint.json"), "w") as f:
|
| 35 |
+
json.dump({"step": current_step}, f)
|
| 36 |
+
train.report(
|
| 37 |
+
{"iterations": current_step, "mean_loss": intermediate_score},
|
| 38 |
+
checkpoint=Checkpoint.from_directory(temp_checkpoint_dir),
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
if __name__ == "__main__":
|
| 43 |
+
parser = argparse.ArgumentParser()
|
| 44 |
+
parser.add_argument(
|
| 45 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 46 |
+
)
|
| 47 |
+
args, _ = parser.parse_known_args()
|
| 48 |
+
|
| 49 |
+
tuner = tune.Tuner(
|
| 50 |
+
train_func,
|
| 51 |
+
run_config=train.RunConfig(
|
| 52 |
+
name="hyperband_test",
|
| 53 |
+
stop={"training_iteration": 1 if args.smoke_test else 10},
|
| 54 |
+
),
|
| 55 |
+
tune_config=tune.TuneConfig(
|
| 56 |
+
metric="mean_loss",
|
| 57 |
+
mode="min",
|
| 58 |
+
num_samples=5,
|
| 59 |
+
),
|
| 60 |
+
param_space={
|
| 61 |
+
"steps": 10,
|
| 62 |
+
"width": tune.randint(10, 100),
|
| 63 |
+
"height": tune.loguniform(10, 100),
|
| 64 |
+
},
|
| 65 |
+
)
|
| 66 |
+
results = tuner.fit()
|
| 67 |
+
best_result = results.get_best_result()
|
| 68 |
+
print("Best hyperparameters: ", best_result.config)
|
| 69 |
+
best_checkpoint = best_result.checkpoint
|
| 70 |
+
print("Best checkpoint: ", best_checkpoint)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/hyperband_example.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
|
| 5 |
+
import ray
|
| 6 |
+
from ray import train, tune
|
| 7 |
+
from ray.tune.schedulers import HyperBandScheduler
|
| 8 |
+
from ray.tune.utils.mock_trainable import MyTrainableClass
|
| 9 |
+
|
| 10 |
+
if __name__ == "__main__":
|
| 11 |
+
parser = argparse.ArgumentParser()
|
| 12 |
+
parser.add_argument(
|
| 13 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 14 |
+
)
|
| 15 |
+
args, _ = parser.parse_known_args()
|
| 16 |
+
|
| 17 |
+
ray.init(num_cpus=4 if args.smoke_test else None)
|
| 18 |
+
|
| 19 |
+
# Hyperband early stopping, configured with `episode_reward_mean` as the
|
| 20 |
+
# objective and `training_iteration` as the time unit,
|
| 21 |
+
# which is automatically filled by Tune.
|
| 22 |
+
hyperband = HyperBandScheduler(time_attr="training_iteration", max_t=200)
|
| 23 |
+
|
| 24 |
+
tuner = tune.Tuner(
|
| 25 |
+
MyTrainableClass,
|
| 26 |
+
run_config=train.RunConfig(
|
| 27 |
+
name="hyperband_test",
|
| 28 |
+
stop={"training_iteration": 1 if args.smoke_test else 200},
|
| 29 |
+
verbose=1,
|
| 30 |
+
failure_config=train.FailureConfig(
|
| 31 |
+
fail_fast=True,
|
| 32 |
+
),
|
| 33 |
+
),
|
| 34 |
+
tune_config=tune.TuneConfig(
|
| 35 |
+
num_samples=20 if args.smoke_test else 200,
|
| 36 |
+
metric="episode_reward_mean",
|
| 37 |
+
mode="max",
|
| 38 |
+
scheduler=hyperband,
|
| 39 |
+
),
|
| 40 |
+
param_space={"width": tune.randint(10, 90), "height": tune.randint(0, 100)},
|
| 41 |
+
)
|
| 42 |
+
results = tuner.fit()
|
| 43 |
+
|
| 44 |
+
print("Best hyperparameters found were: ", results.get_best_result().config)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/hyperband_function_example.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
import ray
|
| 11 |
+
from ray import train, tune
|
| 12 |
+
from ray.train import Checkpoint
|
| 13 |
+
from ray.tune.schedulers import HyperBandScheduler
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def train_func(config):
|
| 17 |
+
step = 0
|
| 18 |
+
checkpoint = train.get_checkpoint()
|
| 19 |
+
if checkpoint:
|
| 20 |
+
with checkpoint.as_directory() as checkpoint_dir:
|
| 21 |
+
with open(os.path.join(checkpoint_dir, "checkpoint.json")) as f:
|
| 22 |
+
step = json.load(f)["timestep"] + 1
|
| 23 |
+
|
| 24 |
+
for timestep in range(step, 100):
|
| 25 |
+
v = np.tanh(float(timestep) / config.get("width", 1))
|
| 26 |
+
v *= config.get("height", 1)
|
| 27 |
+
|
| 28 |
+
# Checkpoint the state of the training every 3 steps
|
| 29 |
+
# Note that this is only required for certain schedulers
|
| 30 |
+
with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
|
| 31 |
+
checkpoint = None
|
| 32 |
+
if timestep % 3 == 0:
|
| 33 |
+
with open(
|
| 34 |
+
os.path.join(temp_checkpoint_dir, "checkpoint.json"), "w"
|
| 35 |
+
) as f:
|
| 36 |
+
json.dump({"timestep": timestep}, f)
|
| 37 |
+
checkpoint = Checkpoint.from_directory(temp_checkpoint_dir)
|
| 38 |
+
|
| 39 |
+
# Here we use `episode_reward_mean`, but you can also report other
|
| 40 |
+
# objectives such as loss or accuracy.
|
| 41 |
+
train.report({"episode_reward_mean": v}, checkpoint=checkpoint)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
if __name__ == "__main__":
|
| 45 |
+
parser = argparse.ArgumentParser()
|
| 46 |
+
parser.add_argument(
|
| 47 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 48 |
+
)
|
| 49 |
+
args, _ = parser.parse_known_args()
|
| 50 |
+
|
| 51 |
+
ray.init(num_cpus=4 if args.smoke_test else None)
|
| 52 |
+
|
| 53 |
+
# Hyperband early stopping, configured with `episode_reward_mean` as the
|
| 54 |
+
# objective and `training_iteration` as the time unit,
|
| 55 |
+
# which is automatically filled by Tune.
|
| 56 |
+
hyperband = HyperBandScheduler(max_t=200)
|
| 57 |
+
|
| 58 |
+
tuner = tune.Tuner(
|
| 59 |
+
train_func,
|
| 60 |
+
run_config=train.RunConfig(
|
| 61 |
+
name="hyperband_test",
|
| 62 |
+
stop={"training_iteration": 10 if args.smoke_test else 99999},
|
| 63 |
+
failure_config=train.FailureConfig(
|
| 64 |
+
fail_fast=True,
|
| 65 |
+
),
|
| 66 |
+
),
|
| 67 |
+
tune_config=tune.TuneConfig(
|
| 68 |
+
num_samples=20,
|
| 69 |
+
metric="episode_reward_mean",
|
| 70 |
+
mode="max",
|
| 71 |
+
scheduler=hyperband,
|
| 72 |
+
),
|
| 73 |
+
param_space={"height": tune.uniform(0, 100)},
|
| 74 |
+
)
|
| 75 |
+
results = tuner.fit()
|
| 76 |
+
print("Best hyperparameters found were: ", results.get_best_result().config)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/hyperopt_conditional_search_space_example.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""This example demonstrates the usage of conditional search spaces with Tune.
|
| 2 |
+
|
| 3 |
+
It also checks that it is usable with a separate scheduler.
|
| 4 |
+
|
| 5 |
+
Requires the HyperOpt library to be installed (`pip install hyperopt`).
|
| 6 |
+
|
| 7 |
+
For an example of using a Tune search space, see
|
| 8 |
+
:doc:`/tune/examples/hyperopt_example`.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import time
|
| 12 |
+
|
| 13 |
+
from hyperopt import hp
|
| 14 |
+
|
| 15 |
+
import ray
|
| 16 |
+
from ray import train, tune
|
| 17 |
+
from ray.tune.schedulers import AsyncHyperBandScheduler
|
| 18 |
+
from ray.tune.search import ConcurrencyLimiter
|
| 19 |
+
from ray.tune.search.hyperopt import HyperOptSearch
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def f_unpack_dict(dct):
|
| 23 |
+
"""
|
| 24 |
+
Unpacks all sub-dictionaries in given dictionary recursively.
|
| 25 |
+
There should be no duplicated keys across all nested
|
| 26 |
+
subdictionaries, or some instances will be lost without warning
|
| 27 |
+
|
| 28 |
+
Source: https://www.kaggle.com/fanvacoolt/tutorial-on-hyperopt
|
| 29 |
+
|
| 30 |
+
Parameters:
|
| 31 |
+
----------------
|
| 32 |
+
dct : dictionary to unpack
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
----------------
|
| 36 |
+
: unpacked dictionary
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
res = {}
|
| 40 |
+
for k, v in dct.items():
|
| 41 |
+
if isinstance(v, dict):
|
| 42 |
+
res = {**res, **f_unpack_dict(v)}
|
| 43 |
+
else:
|
| 44 |
+
res[k] = v
|
| 45 |
+
|
| 46 |
+
return res
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def evaluation_fn(step, width, height, mult=1):
|
| 50 |
+
return (0.1 + width * step / 100) ** (-1) + height * 0.1 * mult
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def easy_objective(config_in):
|
| 54 |
+
# Hyperparameters
|
| 55 |
+
config = f_unpack_dict(config_in)
|
| 56 |
+
width, height, mult = config["width"], config["height"], config.get("mult", 1)
|
| 57 |
+
print(config)
|
| 58 |
+
|
| 59 |
+
for step in range(config["steps"]):
|
| 60 |
+
# Iterative training function - can be any arbitrary training procedure
|
| 61 |
+
intermediate_score = evaluation_fn(step, width, height, mult)
|
| 62 |
+
# Feed the score back back to Tune.
|
| 63 |
+
train.report({"iterations": step, "mean_loss": intermediate_score})
|
| 64 |
+
time.sleep(0.1)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
config_space = {
|
| 68 |
+
"activation": hp.choice(
|
| 69 |
+
"activation",
|
| 70 |
+
[
|
| 71 |
+
{"activation": "relu", "mult": hp.uniform("mult", 1, 2)},
|
| 72 |
+
{"activation": "tanh"},
|
| 73 |
+
],
|
| 74 |
+
),
|
| 75 |
+
"width": hp.uniform("width", 0, 20),
|
| 76 |
+
"height": hp.uniform("heright", -100, 100),
|
| 77 |
+
"steps": 100,
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def run_hyperopt_tune(config_dict=config_space, smoke_test=False):
|
| 82 |
+
algo = HyperOptSearch(space=config_dict, metric="mean_loss", mode="min")
|
| 83 |
+
algo = ConcurrencyLimiter(algo, max_concurrent=4)
|
| 84 |
+
scheduler = AsyncHyperBandScheduler()
|
| 85 |
+
tuner = tune.Tuner(
|
| 86 |
+
easy_objective,
|
| 87 |
+
tune_config=tune.TuneConfig(
|
| 88 |
+
metric="mean_loss",
|
| 89 |
+
mode="min",
|
| 90 |
+
search_alg=algo,
|
| 91 |
+
scheduler=scheduler,
|
| 92 |
+
num_samples=10 if smoke_test else 100,
|
| 93 |
+
),
|
| 94 |
+
)
|
| 95 |
+
results = tuner.fit()
|
| 96 |
+
print("Best hyperparameters found were: ", results.get_best_result().config)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
if __name__ == "__main__":
|
| 100 |
+
import argparse
|
| 101 |
+
|
| 102 |
+
parser = argparse.ArgumentParser()
|
| 103 |
+
parser.add_argument(
|
| 104 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 105 |
+
)
|
| 106 |
+
args, _ = parser.parse_known_args()
|
| 107 |
+
|
| 108 |
+
ray.init(configure_logging=False)
|
| 109 |
+
|
| 110 |
+
run_hyperopt_tune(smoke_test=args.smoke_test)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/logging_example.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
from ray import train, tune
|
| 7 |
+
from ray.tune.logger import LoggerCallback
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TestLoggerCallback(LoggerCallback):
|
| 11 |
+
def on_trial_result(self, iteration, trials, trial, result, **info):
|
| 12 |
+
print(f"TestLogger for trial {trial}: {result}")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def trial_str_creator(trial):
|
| 16 |
+
return "{}_{}_123".format(trial.trainable_name, trial.trial_id)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def evaluation_fn(step, width, height):
|
| 20 |
+
time.sleep(0.1)
|
| 21 |
+
return (0.1 + width * step / 100) ** (-1) + height * 0.1
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def easy_objective(config):
|
| 25 |
+
# Hyperparameters
|
| 26 |
+
width, height = config["width"], config["height"]
|
| 27 |
+
|
| 28 |
+
for step in range(config["steps"]):
|
| 29 |
+
# Iterative training function - can be any arbitrary training procedure
|
| 30 |
+
intermediate_score = evaluation_fn(step, width, height)
|
| 31 |
+
# Feed the score back back to Tune.
|
| 32 |
+
train.report({"iterations": step, "mean_loss": intermediate_score})
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
if __name__ == "__main__":
|
| 36 |
+
parser = argparse.ArgumentParser()
|
| 37 |
+
parser.add_argument(
|
| 38 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 39 |
+
)
|
| 40 |
+
args, _ = parser.parse_known_args()
|
| 41 |
+
|
| 42 |
+
tuner = tune.Tuner(
|
| 43 |
+
easy_objective,
|
| 44 |
+
run_config=train.RunConfig(
|
| 45 |
+
name="hyperband_test",
|
| 46 |
+
callbacks=[TestLoggerCallback()],
|
| 47 |
+
stop={"training_iteration": 1 if args.smoke_test else 100},
|
| 48 |
+
),
|
| 49 |
+
tune_config=tune.TuneConfig(
|
| 50 |
+
metric="mean_loss",
|
| 51 |
+
mode="min",
|
| 52 |
+
num_samples=5,
|
| 53 |
+
trial_name_creator=trial_str_creator,
|
| 54 |
+
trial_dirname_creator=trial_str_creator,
|
| 55 |
+
),
|
| 56 |
+
param_space={
|
| 57 |
+
"steps": 100,
|
| 58 |
+
"width": tune.randint(10, 100),
|
| 59 |
+
"height": tune.loguniform(10, 100),
|
| 60 |
+
},
|
| 61 |
+
)
|
| 62 |
+
results = tuner.fit()
|
| 63 |
+
|
| 64 |
+
print("Best hyperparameters: ", results.get_best_result().config)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/mlflow_example.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
"""Examples using MLfowLoggerCallback and setup_mlflow.
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
import tempfile
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
import mlflow
|
| 9 |
+
|
| 10 |
+
from ray import train, tune
|
| 11 |
+
from ray.air.integrations.mlflow import MLflowLoggerCallback, setup_mlflow
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def evaluation_fn(step, width, height):
|
| 15 |
+
return (0.1 + width * step / 100) ** (-1) + height * 0.1
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def train_function(config):
|
| 19 |
+
# Hyperparameters
|
| 20 |
+
width, height = config["width"], config["height"]
|
| 21 |
+
|
| 22 |
+
for step in range(config.get("steps", 100)):
|
| 23 |
+
# Iterative training function - can be any arbitrary training procedure
|
| 24 |
+
intermediate_score = evaluation_fn(step, width, height)
|
| 25 |
+
# Feed the score back to Tune.
|
| 26 |
+
train.report({"iterations": step, "mean_loss": intermediate_score})
|
| 27 |
+
time.sleep(0.1)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def tune_with_callback(mlflow_tracking_uri, finish_fast=False):
|
| 31 |
+
|
| 32 |
+
tuner = tune.Tuner(
|
| 33 |
+
train_function,
|
| 34 |
+
run_config=train.RunConfig(
|
| 35 |
+
name="mlflow",
|
| 36 |
+
callbacks=[
|
| 37 |
+
MLflowLoggerCallback(
|
| 38 |
+
tracking_uri=mlflow_tracking_uri,
|
| 39 |
+
experiment_name="example",
|
| 40 |
+
save_artifact=True,
|
| 41 |
+
)
|
| 42 |
+
],
|
| 43 |
+
),
|
| 44 |
+
tune_config=tune.TuneConfig(
|
| 45 |
+
num_samples=5,
|
| 46 |
+
),
|
| 47 |
+
param_space={
|
| 48 |
+
"width": tune.randint(10, 100),
|
| 49 |
+
"height": tune.randint(0, 100),
|
| 50 |
+
"steps": 5 if finish_fast else 100,
|
| 51 |
+
},
|
| 52 |
+
)
|
| 53 |
+
tuner.fit()
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def train_function_mlflow(config):
|
| 57 |
+
setup_mlflow(config)
|
| 58 |
+
|
| 59 |
+
# Hyperparameters
|
| 60 |
+
width, height = config["width"], config["height"]
|
| 61 |
+
|
| 62 |
+
for step in range(config.get("steps", 100)):
|
| 63 |
+
# Iterative training function - can be any arbitrary training procedure
|
| 64 |
+
intermediate_score = evaluation_fn(step, width, height)
|
| 65 |
+
# Log the metrics to mlflow
|
| 66 |
+
mlflow.log_metrics(dict(mean_loss=intermediate_score), step=step)
|
| 67 |
+
# Feed the score back to Tune.
|
| 68 |
+
train.report({"iterations": step, "mean_loss": intermediate_score})
|
| 69 |
+
time.sleep(0.1)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def tune_with_setup(mlflow_tracking_uri, finish_fast=False):
|
| 73 |
+
# Set the experiment, or create a new one if does not exist yet.
|
| 74 |
+
mlflow.set_tracking_uri(mlflow_tracking_uri)
|
| 75 |
+
mlflow.set_experiment(experiment_name="mixin_example")
|
| 76 |
+
tuner = tune.Tuner(
|
| 77 |
+
train_function_mlflow,
|
| 78 |
+
run_config=train.RunConfig(
|
| 79 |
+
name="mlflow",
|
| 80 |
+
),
|
| 81 |
+
tune_config=tune.TuneConfig(
|
| 82 |
+
num_samples=5,
|
| 83 |
+
),
|
| 84 |
+
param_space={
|
| 85 |
+
"width": tune.randint(10, 100),
|
| 86 |
+
"height": tune.randint(0, 100),
|
| 87 |
+
"steps": 5 if finish_fast else 100,
|
| 88 |
+
"mlflow": {
|
| 89 |
+
"experiment_name": "mixin_example",
|
| 90 |
+
"tracking_uri": mlflow.get_tracking_uri(),
|
| 91 |
+
},
|
| 92 |
+
},
|
| 93 |
+
)
|
| 94 |
+
tuner.fit()
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
if __name__ == "__main__":
|
| 98 |
+
import argparse
|
| 99 |
+
|
| 100 |
+
parser = argparse.ArgumentParser()
|
| 101 |
+
parser.add_argument(
|
| 102 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 103 |
+
)
|
| 104 |
+
parser.add_argument(
|
| 105 |
+
"--tracking-uri",
|
| 106 |
+
type=str,
|
| 107 |
+
help="The tracking URI for the MLflow tracking server.",
|
| 108 |
+
)
|
| 109 |
+
args, _ = parser.parse_known_args()
|
| 110 |
+
|
| 111 |
+
if args.smoke_test:
|
| 112 |
+
mlflow_tracking_uri = os.path.join(tempfile.gettempdir(), "mlruns")
|
| 113 |
+
else:
|
| 114 |
+
mlflow_tracking_uri = args.tracking_uri
|
| 115 |
+
|
| 116 |
+
tune_with_callback(mlflow_tracking_uri, finish_fast=args.smoke_test)
|
| 117 |
+
if not args.smoke_test:
|
| 118 |
+
df = mlflow.search_runs(
|
| 119 |
+
[mlflow.get_experiment_by_name("example").experiment_id]
|
| 120 |
+
)
|
| 121 |
+
print(df)
|
| 122 |
+
|
| 123 |
+
tune_with_setup(mlflow_tracking_uri, finish_fast=args.smoke_test)
|
| 124 |
+
if not args.smoke_test:
|
| 125 |
+
df = mlflow.search_runs(
|
| 126 |
+
[mlflow.get_experiment_by_name("mixin_example").experiment_id]
|
| 127 |
+
)
|
| 128 |
+
print(df)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/mlflow_ptl.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""An example showing how to use Pytorch Lightning training, Ray Tune
|
| 2 |
+
HPO, and MLflow autologging all together."""
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import tempfile
|
| 6 |
+
|
| 7 |
+
import mlflow
|
| 8 |
+
import pytorch_lightning as pl
|
| 9 |
+
|
| 10 |
+
from ray import train, tune
|
| 11 |
+
from ray.air.integrations.mlflow import setup_mlflow
|
| 12 |
+
from ray.tune.examples.mnist_ptl_mini import LightningMNISTClassifier, MNISTDataModule
|
| 13 |
+
from ray.tune.integration.pytorch_lightning import TuneReportCallback
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def train_mnist_tune(config, data_dir=None, num_epochs=10, num_gpus=0):
|
| 17 |
+
setup_mlflow(
|
| 18 |
+
config,
|
| 19 |
+
experiment_name=config.get("experiment_name", None),
|
| 20 |
+
tracking_uri=config.get("tracking_uri", None),
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
model = LightningMNISTClassifier(config, data_dir)
|
| 24 |
+
dm = MNISTDataModule(
|
| 25 |
+
data_dir=data_dir, num_workers=1, batch_size=config["batch_size"]
|
| 26 |
+
)
|
| 27 |
+
metrics = {"loss": "ptl/val_loss", "acc": "ptl/val_accuracy"}
|
| 28 |
+
mlflow.pytorch.autolog()
|
| 29 |
+
trainer = pl.Trainer(
|
| 30 |
+
max_epochs=num_epochs,
|
| 31 |
+
gpus=num_gpus,
|
| 32 |
+
progress_bar_refresh_rate=0,
|
| 33 |
+
callbacks=[TuneReportCallback(metrics, on="validation_end")],
|
| 34 |
+
)
|
| 35 |
+
trainer.fit(model, dm)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def tune_mnist(
|
| 39 |
+
num_samples=10,
|
| 40 |
+
num_epochs=10,
|
| 41 |
+
gpus_per_trial=0,
|
| 42 |
+
tracking_uri=None,
|
| 43 |
+
experiment_name="ptl_autologging_example",
|
| 44 |
+
):
|
| 45 |
+
data_dir = os.path.join(tempfile.gettempdir(), "mnist_data_")
|
| 46 |
+
# Download data
|
| 47 |
+
MNISTDataModule(data_dir=data_dir, batch_size=32).prepare_data()
|
| 48 |
+
|
| 49 |
+
# Set the MLflow experiment, or create it if it does not exist.
|
| 50 |
+
mlflow.set_tracking_uri(tracking_uri)
|
| 51 |
+
mlflow.set_experiment(experiment_name)
|
| 52 |
+
|
| 53 |
+
config = {
|
| 54 |
+
"layer_1": tune.choice([32, 64, 128]),
|
| 55 |
+
"layer_2": tune.choice([64, 128, 256]),
|
| 56 |
+
"lr": tune.loguniform(1e-4, 1e-1),
|
| 57 |
+
"batch_size": tune.choice([32, 64, 128]),
|
| 58 |
+
"experiment_name": experiment_name,
|
| 59 |
+
"tracking_uri": mlflow.get_tracking_uri(),
|
| 60 |
+
"data_dir": os.path.join(tempfile.gettempdir(), "mnist_data_"),
|
| 61 |
+
"num_epochs": num_epochs,
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
trainable = tune.with_parameters(
|
| 65 |
+
train_mnist_tune,
|
| 66 |
+
data_dir=data_dir,
|
| 67 |
+
num_epochs=num_epochs,
|
| 68 |
+
num_gpus=gpus_per_trial,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
tuner = tune.Tuner(
|
| 72 |
+
tune.with_resources(trainable, resources={"cpu": 1, "gpu": gpus_per_trial}),
|
| 73 |
+
tune_config=tune.TuneConfig(
|
| 74 |
+
metric="loss",
|
| 75 |
+
mode="min",
|
| 76 |
+
num_samples=num_samples,
|
| 77 |
+
),
|
| 78 |
+
run_config=train.RunConfig(
|
| 79 |
+
name="tune_mnist",
|
| 80 |
+
),
|
| 81 |
+
param_space=config,
|
| 82 |
+
)
|
| 83 |
+
results = tuner.fit()
|
| 84 |
+
|
| 85 |
+
print("Best hyperparameters found were: ", results.get_best_result().config)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
if __name__ == "__main__":
|
| 89 |
+
import argparse
|
| 90 |
+
|
| 91 |
+
parser = argparse.ArgumentParser()
|
| 92 |
+
parser.add_argument(
|
| 93 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 94 |
+
)
|
| 95 |
+
args, _ = parser.parse_known_args()
|
| 96 |
+
|
| 97 |
+
if args.smoke_test:
|
| 98 |
+
tune_mnist(
|
| 99 |
+
num_samples=1,
|
| 100 |
+
num_epochs=1,
|
| 101 |
+
gpus_per_trial=0,
|
| 102 |
+
tracking_uri=os.path.join(tempfile.gettempdir(), "mlruns"),
|
| 103 |
+
)
|
| 104 |
+
else:
|
| 105 |
+
tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_ptl_mini.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
import pytorch_lightning as pl
|
| 5 |
+
import torch
|
| 6 |
+
from datasets import load_dataset
|
| 7 |
+
from filelock import FileLock
|
| 8 |
+
from torch.nn import functional as F
|
| 9 |
+
from torch.utils.data import DataLoader
|
| 10 |
+
from torchmetrics import Accuracy
|
| 11 |
+
from torchvision import transforms
|
| 12 |
+
|
| 13 |
+
from ray import train, tune
|
| 14 |
+
from ray.tune.integration.pytorch_lightning import TuneReportCheckpointCallback
|
| 15 |
+
|
| 16 |
+
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class MNISTDataModule(pl.LightningDataModule):
|
| 20 |
+
def __init__(self, batch_size: int, data_dir: str = PATH_DATASETS):
|
| 21 |
+
super().__init__()
|
| 22 |
+
self.data_dir = data_dir
|
| 23 |
+
self.transform = transforms.Compose(
|
| 24 |
+
[
|
| 25 |
+
transforms.ToTensor(),
|
| 26 |
+
transforms.Normalize((0.1307,), (0.3081,)),
|
| 27 |
+
]
|
| 28 |
+
)
|
| 29 |
+
self.batch_size = batch_size
|
| 30 |
+
self.dims = (1, 28, 28)
|
| 31 |
+
self.num_classes = 10
|
| 32 |
+
|
| 33 |
+
def prepare_data(self):
|
| 34 |
+
# download
|
| 35 |
+
with FileLock(os.path.expanduser("~/.data.lock")):
|
| 36 |
+
load_dataset("ylecun/mnist", cache_dir=self.data_dir)
|
| 37 |
+
|
| 38 |
+
def setup(self, stage=None):
|
| 39 |
+
dataset = load_dataset("ylecun/mnist", cache_dir=self.data_dir)
|
| 40 |
+
|
| 41 |
+
def transform_fn(sample):
|
| 42 |
+
return (self.transform(sample["image"]), sample["label"])
|
| 43 |
+
|
| 44 |
+
self.mnist_train = [transform_fn(sample) for sample in dataset["train"]]
|
| 45 |
+
self.mnist_val = [transform_fn(sample) for sample in dataset["test"]]
|
| 46 |
+
|
| 47 |
+
def train_dataloader(self):
|
| 48 |
+
return DataLoader(self.mnist_train, batch_size=self.batch_size)
|
| 49 |
+
|
| 50 |
+
def val_dataloader(self):
|
| 51 |
+
return DataLoader(self.mnist_val, batch_size=self.batch_size)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class LightningMNISTClassifier(pl.LightningModule):
|
| 55 |
+
def __init__(self, config, data_dir=None):
|
| 56 |
+
super(LightningMNISTClassifier, self).__init__()
|
| 57 |
+
|
| 58 |
+
self.data_dir = data_dir or os.getcwd()
|
| 59 |
+
self.lr = config["lr"]
|
| 60 |
+
layer_1, layer_2 = config["layer_1"], config["layer_2"]
|
| 61 |
+
self.batch_size = config["batch_size"]
|
| 62 |
+
|
| 63 |
+
# mnist images are (1, 28, 28) (channels, width, height)
|
| 64 |
+
self.layer_1 = torch.nn.Linear(28 * 28, layer_1)
|
| 65 |
+
self.layer_2 = torch.nn.Linear(layer_1, layer_2)
|
| 66 |
+
self.layer_3 = torch.nn.Linear(layer_2, 10)
|
| 67 |
+
self.accuracy = Accuracy(task="multiclass", num_classes=10, top_k=1)
|
| 68 |
+
|
| 69 |
+
def forward(self, x):
|
| 70 |
+
batch_size, channels, width, height = x.size()
|
| 71 |
+
x = x.view(batch_size, -1)
|
| 72 |
+
x = self.layer_1(x)
|
| 73 |
+
x = torch.relu(x)
|
| 74 |
+
x = self.layer_2(x)
|
| 75 |
+
x = torch.relu(x)
|
| 76 |
+
x = self.layer_3(x)
|
| 77 |
+
x = torch.log_softmax(x, dim=1)
|
| 78 |
+
return x
|
| 79 |
+
|
| 80 |
+
def configure_optimizers(self):
|
| 81 |
+
return torch.optim.Adam(self.parameters(), lr=self.lr)
|
| 82 |
+
|
| 83 |
+
def training_step(self, train_batch, batch_idx):
|
| 84 |
+
x, y = train_batch
|
| 85 |
+
logits = self.forward(x)
|
| 86 |
+
loss = F.nll_loss(logits, y)
|
| 87 |
+
acc = self.accuracy(logits, y)
|
| 88 |
+
self.log("ptl/train_loss", loss)
|
| 89 |
+
self.log("ptl/train_accuracy", acc)
|
| 90 |
+
return loss
|
| 91 |
+
|
| 92 |
+
def validation_step(self, val_batch, batch_idx):
|
| 93 |
+
x, y = val_batch
|
| 94 |
+
logits = self.forward(x)
|
| 95 |
+
loss = F.nll_loss(logits, y)
|
| 96 |
+
acc = self.accuracy(logits, y)
|
| 97 |
+
return {"val_loss": loss, "val_accuracy": acc}
|
| 98 |
+
|
| 99 |
+
def validation_epoch_end(self, outputs):
|
| 100 |
+
avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
|
| 101 |
+
avg_acc = torch.stack([x["val_accuracy"] for x in outputs]).mean()
|
| 102 |
+
self.log("ptl/val_loss", avg_loss)
|
| 103 |
+
self.log("ptl/val_accuracy", avg_acc)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def train_mnist_tune(config, num_epochs=10, num_gpus=0):
|
| 107 |
+
data_dir = os.path.abspath("./data")
|
| 108 |
+
model = LightningMNISTClassifier(config, data_dir)
|
| 109 |
+
with FileLock(os.path.expanduser("~/.data.lock")):
|
| 110 |
+
dm = MNISTDataModule(data_dir=data_dir, batch_size=config["batch_size"])
|
| 111 |
+
metrics = {"loss": "ptl/val_loss", "acc": "ptl/val_accuracy"}
|
| 112 |
+
trainer = pl.Trainer(
|
| 113 |
+
max_epochs=num_epochs,
|
| 114 |
+
# If fractional GPUs passed in, convert to int.
|
| 115 |
+
gpus=math.ceil(num_gpus),
|
| 116 |
+
enable_progress_bar=False,
|
| 117 |
+
callbacks=[
|
| 118 |
+
TuneReportCheckpointCallback(
|
| 119 |
+
metrics, on="validation_end", save_checkpoints=False
|
| 120 |
+
)
|
| 121 |
+
],
|
| 122 |
+
)
|
| 123 |
+
trainer.fit(model, dm)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0):
|
| 127 |
+
config = {
|
| 128 |
+
"layer_1": tune.choice([32, 64, 128]),
|
| 129 |
+
"layer_2": tune.choice([64, 128, 256]),
|
| 130 |
+
"lr": tune.loguniform(1e-4, 1e-1),
|
| 131 |
+
"batch_size": tune.choice([32, 64, 128]),
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
trainable = tune.with_parameters(
|
| 135 |
+
train_mnist_tune, num_epochs=num_epochs, num_gpus=gpus_per_trial
|
| 136 |
+
)
|
| 137 |
+
tuner = tune.Tuner(
|
| 138 |
+
tune.with_resources(trainable, resources={"cpu": 1, "gpu": gpus_per_trial}),
|
| 139 |
+
tune_config=tune.TuneConfig(
|
| 140 |
+
metric="loss",
|
| 141 |
+
mode="min",
|
| 142 |
+
num_samples=num_samples,
|
| 143 |
+
),
|
| 144 |
+
run_config=train.RunConfig(
|
| 145 |
+
name="tune_mnist",
|
| 146 |
+
),
|
| 147 |
+
param_space=config,
|
| 148 |
+
)
|
| 149 |
+
results = tuner.fit()
|
| 150 |
+
|
| 151 |
+
print("Best hyperparameters found were: ", results.get_best_result().config)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
if __name__ == "__main__":
|
| 155 |
+
import argparse
|
| 156 |
+
|
| 157 |
+
parser = argparse.ArgumentParser()
|
| 158 |
+
parser.add_argument(
|
| 159 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 160 |
+
)
|
| 161 |
+
args, _ = parser.parse_known_args()
|
| 162 |
+
|
| 163 |
+
if args.smoke_test:
|
| 164 |
+
tune_mnist(num_samples=1, num_epochs=1, gpus_per_trial=0)
|
| 165 |
+
else:
|
| 166 |
+
tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_pytorch.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Original Code here:
|
| 2 |
+
# https://github.com/pytorch/examples/blob/master/mnist/main.py
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
import torch.nn as nn
|
| 10 |
+
import torch.nn.functional as F
|
| 11 |
+
import torch.optim as optim
|
| 12 |
+
from filelock import FileLock
|
| 13 |
+
from torchvision import datasets, transforms
|
| 14 |
+
|
| 15 |
+
import ray
|
| 16 |
+
from ray import train, tune
|
| 17 |
+
from ray.train import Checkpoint
|
| 18 |
+
from ray.tune.schedulers import AsyncHyperBandScheduler
|
| 19 |
+
|
| 20 |
+
# Change these values if you want the training to run quicker or slower.
|
| 21 |
+
EPOCH_SIZE = 512
|
| 22 |
+
TEST_SIZE = 256
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class ConvNet(nn.Module):
|
| 26 |
+
def __init__(self):
|
| 27 |
+
super(ConvNet, self).__init__()
|
| 28 |
+
self.conv1 = nn.Conv2d(1, 3, kernel_size=3)
|
| 29 |
+
self.fc = nn.Linear(192, 10)
|
| 30 |
+
|
| 31 |
+
def forward(self, x):
|
| 32 |
+
x = F.relu(F.max_pool2d(self.conv1(x), 3))
|
| 33 |
+
x = x.view(-1, 192)
|
| 34 |
+
x = self.fc(x)
|
| 35 |
+
return F.log_softmax(x, dim=1)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def train_func(model, optimizer, train_loader, device=None):
|
| 39 |
+
device = device or torch.device("cpu")
|
| 40 |
+
model.train()
|
| 41 |
+
for batch_idx, (data, target) in enumerate(train_loader):
|
| 42 |
+
if batch_idx * len(data) > EPOCH_SIZE:
|
| 43 |
+
return
|
| 44 |
+
data, target = data.to(device), target.to(device)
|
| 45 |
+
optimizer.zero_grad()
|
| 46 |
+
output = model(data)
|
| 47 |
+
loss = F.nll_loss(output, target)
|
| 48 |
+
loss.backward()
|
| 49 |
+
optimizer.step()
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def test_func(model, data_loader, device=None):
|
| 53 |
+
device = device or torch.device("cpu")
|
| 54 |
+
model.eval()
|
| 55 |
+
correct = 0
|
| 56 |
+
total = 0
|
| 57 |
+
with torch.no_grad():
|
| 58 |
+
for batch_idx, (data, target) in enumerate(data_loader):
|
| 59 |
+
if batch_idx * len(data) > TEST_SIZE:
|
| 60 |
+
break
|
| 61 |
+
data, target = data.to(device), target.to(device)
|
| 62 |
+
outputs = model(data)
|
| 63 |
+
_, predicted = torch.max(outputs.data, 1)
|
| 64 |
+
total += target.size(0)
|
| 65 |
+
correct += (predicted == target).sum().item()
|
| 66 |
+
|
| 67 |
+
return correct / total
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def get_data_loaders(batch_size=64):
|
| 71 |
+
mnist_transforms = transforms.Compose(
|
| 72 |
+
[transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# We add FileLock here because multiple workers will want to
|
| 76 |
+
# download data, and this may cause overwrites since
|
| 77 |
+
# DataLoader is not threadsafe.
|
| 78 |
+
with FileLock(os.path.expanduser("~/data.lock")):
|
| 79 |
+
train_loader = torch.utils.data.DataLoader(
|
| 80 |
+
datasets.MNIST(
|
| 81 |
+
"~/data", train=True, download=True, transform=mnist_transforms
|
| 82 |
+
),
|
| 83 |
+
batch_size=batch_size,
|
| 84 |
+
shuffle=True,
|
| 85 |
+
)
|
| 86 |
+
test_loader = torch.utils.data.DataLoader(
|
| 87 |
+
datasets.MNIST(
|
| 88 |
+
"~/data", train=False, download=True, transform=mnist_transforms
|
| 89 |
+
),
|
| 90 |
+
batch_size=batch_size,
|
| 91 |
+
shuffle=True,
|
| 92 |
+
)
|
| 93 |
+
return train_loader, test_loader
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def train_mnist(config):
|
| 97 |
+
should_checkpoint = config.get("should_checkpoint", False)
|
| 98 |
+
use_cuda = torch.cuda.is_available()
|
| 99 |
+
device = torch.device("cuda" if use_cuda else "cpu")
|
| 100 |
+
train_loader, test_loader = get_data_loaders()
|
| 101 |
+
model = ConvNet().to(device)
|
| 102 |
+
|
| 103 |
+
optimizer = optim.SGD(
|
| 104 |
+
model.parameters(), lr=config["lr"], momentum=config["momentum"]
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
while True:
|
| 108 |
+
train_func(model, optimizer, train_loader, device)
|
| 109 |
+
acc = test_func(model, test_loader, device)
|
| 110 |
+
metrics = {"mean_accuracy": acc}
|
| 111 |
+
|
| 112 |
+
# Report metrics (and possibly a checkpoint)
|
| 113 |
+
if should_checkpoint:
|
| 114 |
+
with tempfile.TemporaryDirectory() as tempdir:
|
| 115 |
+
torch.save(model.state_dict(), os.path.join(tempdir, "model.pt"))
|
| 116 |
+
train.report(metrics, checkpoint=Checkpoint.from_directory(tempdir))
|
| 117 |
+
else:
|
| 118 |
+
train.report(metrics)
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
if __name__ == "__main__":
|
| 122 |
+
parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
|
| 123 |
+
parser.add_argument(
|
| 124 |
+
"--cuda", action="store_true", default=False, help="Enables GPU training"
|
| 125 |
+
)
|
| 126 |
+
parser.add_argument(
|
| 127 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 128 |
+
)
|
| 129 |
+
args, _ = parser.parse_known_args()
|
| 130 |
+
|
| 131 |
+
ray.init(num_cpus=2 if args.smoke_test else None)
|
| 132 |
+
|
| 133 |
+
# for early stopping
|
| 134 |
+
sched = AsyncHyperBandScheduler()
|
| 135 |
+
|
| 136 |
+
resources_per_trial = {"cpu": 2, "gpu": int(args.cuda)} # set this for GPUs
|
| 137 |
+
tuner = tune.Tuner(
|
| 138 |
+
tune.with_resources(train_mnist, resources=resources_per_trial),
|
| 139 |
+
tune_config=tune.TuneConfig(
|
| 140 |
+
metric="mean_accuracy",
|
| 141 |
+
mode="max",
|
| 142 |
+
scheduler=sched,
|
| 143 |
+
num_samples=1 if args.smoke_test else 50,
|
| 144 |
+
),
|
| 145 |
+
run_config=train.RunConfig(
|
| 146 |
+
name="exp",
|
| 147 |
+
stop={
|
| 148 |
+
"mean_accuracy": 0.98,
|
| 149 |
+
"training_iteration": 5 if args.smoke_test else 100,
|
| 150 |
+
},
|
| 151 |
+
),
|
| 152 |
+
param_space={
|
| 153 |
+
"lr": tune.loguniform(1e-4, 1e-2),
|
| 154 |
+
"momentum": tune.uniform(0.1, 0.9),
|
| 155 |
+
},
|
| 156 |
+
)
|
| 157 |
+
results = tuner.fit()
|
| 158 |
+
|
| 159 |
+
print("Best config is:", results.get_best_result().config)
|
| 160 |
+
|
| 161 |
+
assert not results.errors
|
.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_pytorch_trainable.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Original Code here:
|
| 2 |
+
# https://github.com/pytorch/examples/blob/master/mnist/main.py
|
| 3 |
+
from __future__ import print_function
|
| 4 |
+
|
| 5 |
+
import argparse
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
import torch.optim as optim
|
| 10 |
+
|
| 11 |
+
import ray
|
| 12 |
+
from ray import train, tune
|
| 13 |
+
from ray.tune.examples.mnist_pytorch import (
|
| 14 |
+
ConvNet,
|
| 15 |
+
get_data_loaders,
|
| 16 |
+
test_func,
|
| 17 |
+
train_func,
|
| 18 |
+
)
|
| 19 |
+
from ray.tune.schedulers import ASHAScheduler
|
| 20 |
+
|
| 21 |
+
# Change these values if you want the training to run quicker or slower.
|
| 22 |
+
EPOCH_SIZE = 512
|
| 23 |
+
TEST_SIZE = 256
|
| 24 |
+
|
| 25 |
+
# Training settings
|
| 26 |
+
parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
|
| 27 |
+
parser.add_argument(
|
| 28 |
+
"--use-gpu", action="store_true", default=False, help="enables CUDA training"
|
| 29 |
+
)
|
| 30 |
+
parser.add_argument("--ray-address", type=str, help="The Redis address of the cluster.")
|
| 31 |
+
parser.add_argument(
|
| 32 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# Below comments are for documentation purposes only.
|
| 37 |
+
# fmt: off
|
| 38 |
+
# __trainable_example_begin__
|
| 39 |
+
class TrainMNIST(tune.Trainable):
|
| 40 |
+
def setup(self, config):
|
| 41 |
+
use_cuda = config.get("use_gpu") and torch.cuda.is_available()
|
| 42 |
+
self.device = torch.device("cuda" if use_cuda else "cpu")
|
| 43 |
+
self.train_loader, self.test_loader = get_data_loaders()
|
| 44 |
+
self.model = ConvNet().to(self.device)
|
| 45 |
+
self.optimizer = optim.SGD(
|
| 46 |
+
self.model.parameters(),
|
| 47 |
+
lr=config.get("lr", 0.01),
|
| 48 |
+
momentum=config.get("momentum", 0.9))
|
| 49 |
+
|
| 50 |
+
def step(self):
|
| 51 |
+
train_func(
|
| 52 |
+
self.model, self.optimizer, self.train_loader, device=self.device)
|
| 53 |
+
acc = test_func(self.model, self.test_loader, self.device)
|
| 54 |
+
return {"mean_accuracy": acc}
|
| 55 |
+
|
| 56 |
+
def save_checkpoint(self, checkpoint_dir):
|
| 57 |
+
checkpoint_path = os.path.join(checkpoint_dir, "model.pth")
|
| 58 |
+
torch.save(self.model.state_dict(), checkpoint_path)
|
| 59 |
+
|
| 60 |
+
def load_checkpoint(self, checkpoint_dir):
|
| 61 |
+
checkpoint_path = os.path.join(checkpoint_dir, "model.pth")
|
| 62 |
+
self.model.load_state_dict(torch.load(checkpoint_path))
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# __trainable_example_end__
|
| 66 |
+
# fmt: on
|
| 67 |
+
|
| 68 |
+
if __name__ == "__main__":
|
| 69 |
+
args = parser.parse_args()
|
| 70 |
+
ray.init(address=args.ray_address, num_cpus=6 if args.smoke_test else None)
|
| 71 |
+
sched = ASHAScheduler()
|
| 72 |
+
|
| 73 |
+
tuner = tune.Tuner(
|
| 74 |
+
tune.with_resources(TrainMNIST, resources={"cpu": 3, "gpu": int(args.use_gpu)}),
|
| 75 |
+
run_config=train.RunConfig(
|
| 76 |
+
stop={
|
| 77 |
+
"mean_accuracy": 0.95,
|
| 78 |
+
"training_iteration": 3 if args.smoke_test else 20,
|
| 79 |
+
},
|
| 80 |
+
checkpoint_config=train.CheckpointConfig(
|
| 81 |
+
checkpoint_at_end=True, checkpoint_frequency=3
|
| 82 |
+
),
|
| 83 |
+
),
|
| 84 |
+
tune_config=tune.TuneConfig(
|
| 85 |
+
metric="mean_accuracy",
|
| 86 |
+
mode="max",
|
| 87 |
+
scheduler=sched,
|
| 88 |
+
num_samples=1 if args.smoke_test else 20,
|
| 89 |
+
),
|
| 90 |
+
param_space={
|
| 91 |
+
"args": args,
|
| 92 |
+
"lr": tune.uniform(0.001, 0.1),
|
| 93 |
+
"momentum": tune.uniform(0.1, 0.9),
|
| 94 |
+
},
|
| 95 |
+
)
|
| 96 |
+
results = tuner.fit()
|
| 97 |
+
|
| 98 |
+
print("Best config is:", results.get_best_result().config)
|
.venv/lib/python3.11/site-packages/ray/tune/examples/nevergrad_example.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""This example demonstrates the usage of Nevergrad with Ray Tune.
|
| 2 |
+
|
| 3 |
+
It also checks that it is usable with a separate scheduler.
|
| 4 |
+
|
| 5 |
+
Requires the Nevergrad library to be installed (`pip install nevergrad`).
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import time
|
| 9 |
+
|
| 10 |
+
from ray import train, tune
|
| 11 |
+
from ray.tune.schedulers import AsyncHyperBandScheduler
|
| 12 |
+
from ray.tune.search import ConcurrencyLimiter
|
| 13 |
+
from ray.tune.search.nevergrad import NevergradSearch
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def evaluation_fn(step, width, height):
|
| 17 |
+
return (0.1 + width * step / 100) ** (-1) + height * 0.1
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def easy_objective(config):
|
| 21 |
+
# Hyperparameters
|
| 22 |
+
width, height = config["width"], config["height"]
|
| 23 |
+
|
| 24 |
+
for step in range(config["steps"]):
|
| 25 |
+
# Iterative training function - can be any arbitrary training procedure
|
| 26 |
+
intermediate_score = evaluation_fn(step, width, height)
|
| 27 |
+
# Feed the score back back to Tune.
|
| 28 |
+
train.report({"iterations": step, "mean_loss": intermediate_score})
|
| 29 |
+
time.sleep(0.1)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
if __name__ == "__main__":
|
| 33 |
+
import argparse
|
| 34 |
+
|
| 35 |
+
import nevergrad as ng
|
| 36 |
+
|
| 37 |
+
parser = argparse.ArgumentParser()
|
| 38 |
+
parser.add_argument(
|
| 39 |
+
"--smoke-test", action="store_true", help="Finish quickly for testing"
|
| 40 |
+
)
|
| 41 |
+
args, _ = parser.parse_known_args()
|
| 42 |
+
|
| 43 |
+
# Optional: Pass the parameter space yourself
|
| 44 |
+
# space = ng.p.Dict(
|
| 45 |
+
# width=ng.p.Scalar(lower=0, upper=20),
|
| 46 |
+
# height=ng.p.Scalar(lower=-100, upper=100),
|
| 47 |
+
# activation=ng.p.Choice(choices=["relu", "tanh"])
|
| 48 |
+
# )
|
| 49 |
+
|
| 50 |
+
algo = NevergradSearch(
|
| 51 |
+
optimizer=ng.optimizers.OnePlusOne,
|
| 52 |
+
# space=space, # If you want to set the space manually
|
| 53 |
+
)
|
| 54 |
+
algo = ConcurrencyLimiter(algo, max_concurrent=4)
|
| 55 |
+
|
| 56 |
+
scheduler = AsyncHyperBandScheduler()
|
| 57 |
+
|
| 58 |
+
tuner = tune.Tuner(
|
| 59 |
+
easy_objective,
|
| 60 |
+
tune_config=tune.TuneConfig(
|
| 61 |
+
metric="mean_loss",
|
| 62 |
+
mode="min",
|
| 63 |
+
search_alg=algo,
|
| 64 |
+
scheduler=scheduler,
|
| 65 |
+
num_samples=10 if args.smoke_test else 50,
|
| 66 |
+
),
|
| 67 |
+
run_config=train.RunConfig(name="nevergrad"),
|
| 68 |
+
param_space={
|
| 69 |
+
"steps": 100,
|
| 70 |
+
"width": tune.uniform(0, 20),
|
| 71 |
+
"height": tune.uniform(-100, 100),
|
| 72 |
+
"activation": tune.choice(["relu", "tanh"]),
|
| 73 |
+
},
|
| 74 |
+
)
|
| 75 |
+
results = tuner.fit()
|
| 76 |
+
|
| 77 |
+
print("Best hyperparameters found were: ", results.get_best_result().config)
|