diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__init__.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/async_hyperband_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/async_hyperband_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..87386062462fddd4a9d901d73350a1657eb28dae Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/async_hyperband_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/ax_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/ax_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d57e1c50c51cbf2af333b4994f5f5eed3275bd1a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/ax_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/bayesopt_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/bayesopt_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b66f9c12d731a5907753463f342f086523f7188c Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/bayesopt_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/bohb_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/bohb_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..212a2610335cf377e00df56648418f2d088c69e0 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/bohb_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/cifar10_pytorch.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/cifar10_pytorch.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e9849d6ef7bde40d1225a284eb1aec8f0eedef75 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/cifar10_pytorch.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/custom_func_checkpointing.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/custom_func_checkpointing.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8f20aba44b71287714abe9a7acc8ef18220f872 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/custom_func_checkpointing.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperband_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperband_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b220240cce7dac3163d577c71dd2bba5d307af7 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperband_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperband_function_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperband_function_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2f289c225996a1768c7fa00ea62b924e784be6f Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperband_function_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperopt_conditional_search_space_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperopt_conditional_search_space_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c4c19018b01aac6e8c2c050290b3be6f175bd2c9 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/hyperopt_conditional_search_space_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/lightgbm_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/lightgbm_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ed8af924ac2d85f422262a579401282160458160 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/lightgbm_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/logging_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/logging_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b30a44574da6d8ae7ac4e5714608bce1483e5cf Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/logging_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mlflow_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mlflow_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b025baf8e24019c38e9168a1ea61556199b734fa Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mlflow_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mlflow_ptl.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mlflow_ptl.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff45bc6aeb32ef9b8255a4353c240ee13c2c77ec Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mlflow_ptl.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_ptl_mini.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_ptl_mini.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f23c55053f611cdbb4c5888e41d070e49772c320 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_ptl_mini.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_pytorch.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_pytorch.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6c91e843574fd1ba84bed03e95d31363e763162 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_pytorch.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_pytorch_trainable.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_pytorch_trainable.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..028458fe4fa439c727e6a0c96170b25a6b1eb246 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/mnist_pytorch_trainable.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/nevergrad_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/nevergrad_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ef33b122b8824c636f566e490b6b1fa3c4c77be Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/nevergrad_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_define_by_run_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_define_by_run_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6372f8f163b03ce116584e9353de7da351fc0e16 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_define_by_run_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86d4a625118db1afdc46cf20c909bf44de5fdda2 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_multiobjective_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_multiobjective_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5fcd5632c829bba29d9ff7995799922bfc081946 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/optuna_multiobjective_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pb2_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pb2_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7b3d770a43c6690d6cf46e08bf2b951b5f96549 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pb2_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pb2_ppo_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pb2_ppo_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0996c56e0c0d96f20ff1745d7a17f9144887367d Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pb2_ppo_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_convnet_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_convnet_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1e89e54e74412d99ce1812083f63d2c7ae9463ce Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_convnet_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_convnet_function_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_convnet_function_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d6bfdad9d2ee0af5e8f7ef75ed67f7f6037b917 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_convnet_function_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3021c4e8bed9d29dfc8c075ad460df4757d4c0bd Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_function.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_function.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4474d37a1eb309ce823afb31b5a90d73f3a88738 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_function.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_memnn_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_memnn_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92875bed1fc16c48cc37262a36ff118da459797c Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_memnn_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_ppo_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_ppo_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f7f421a6aba429470bdf9ccc72f4e3b53a1ba50 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_ppo_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_tune_cifar10_with_keras.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_tune_cifar10_with_keras.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c9c4bb76b181330f420b9c45d666bf8cff2a9c4 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/pbt_tune_cifar10_with_keras.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tf_mnist_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tf_mnist_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a1dca2e0fa15f214d88e5e4c18df6776c4d2c56 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tf_mnist_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tune_basic_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tune_basic_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2cfd0f00921fc8645b6e61da7166caa6e30e24a8 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tune_basic_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tune_mnist_keras.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tune_mnist_keras.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f0253b844a701c1025962748f50c02b19052f8e Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/tune_mnist_keras.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/xgboost_dynamic_resources_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/xgboost_dynamic_resources_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5349f838a976e567f634c425a31a2f00882b1ee0 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/xgboost_dynamic_resources_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/xgboost_example.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/xgboost_example.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fca5cf6e0951db7e16eafd9cda986dbd5e95b946 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/__pycache__/xgboost_example.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/async_hyperband_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/async_hyperband_example.py new file mode 100644 index 0000000000000000000000000000000000000000..821a201eef8c38c094d4cf0644ece484ccbd418c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/async_hyperband_example.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +import argparse +import time + +from ray import train, tune +from ray.tune.schedulers import AsyncHyperBandScheduler + + +def evaluation_fn(step, width, height): + time.sleep(0.1) + return (0.1 + width * step / 100) ** (-1) + height * 0.1 + + +def easy_objective(config): + # Hyperparameters + width, height = config["width"], config["height"] + + for step in range(config["steps"]): + # Iterative training function - can be an arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Feed the score back back to Tune. + train.report({"iterations": step, "mean_loss": intermediate_score}) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + # AsyncHyperBand enables aggressive early stopping of bad trials. + scheduler = AsyncHyperBandScheduler(grace_period=5, max_t=100) + + # 'training_iteration' is incremented every time `trainable.step` is called + stopping_criteria = {"training_iteration": 1 if args.smoke_test else 9999} + + tuner = tune.Tuner( + tune.with_resources(easy_objective, {"cpu": 1, "gpu": 0}), + run_config=train.RunConfig( + name="asynchyperband_test", + stop=stopping_criteria, + verbose=1, + ), + tune_config=tune.TuneConfig( + metric="mean_loss", mode="min", scheduler=scheduler, num_samples=20 + ), + param_space={ # Hyperparameter space + "steps": 100, + "width": tune.uniform(10, 100), + "height": tune.uniform(0, 100), + }, + ) + results = tuner.fit() + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/ax_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/ax_example.py new file mode 100644 index 0000000000000000000000000000000000000000..feccb2f9b4014322f78e6d393e8356352c203129 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/ax_example.py @@ -0,0 +1,97 @@ +"""This example demonstrates the usage of AxSearch with Ray Tune. + +It also checks that it is usable with a separate scheduler. + +Requires the Ax library to be installed (`pip install ax-platform`). +""" + +import time + +import numpy as np + +from ray import train, tune +from ray.tune.schedulers import AsyncHyperBandScheduler +from ray.tune.search.ax import AxSearch + + +def hartmann6(x): + alpha = np.array([1.0, 1.2, 3.0, 3.2]) + A = np.array( + [ + [10, 3, 17, 3.5, 1.7, 8], + [0.05, 10, 17, 0.1, 8, 14], + [3, 3.5, 1.7, 10, 17, 8], + [17, 8, 0.05, 10, 0.1, 14], + ] + ) + P = 10 ** (-4) * np.array( + [ + [1312, 1696, 5569, 124, 8283, 5886], + [2329, 4135, 8307, 3736, 1004, 9991], + [2348, 1451, 3522, 2883, 3047, 6650], + [4047, 8828, 8732, 5743, 1091, 381], + ] + ) + y = 0.0 + for j, alpha_j in enumerate(alpha): + t = 0 + for k in range(6): + t += A[j, k] * ((x[k] - P[j, k]) ** 2) + y -= alpha_j * np.exp(-t) + return y + + +def easy_objective(config): + for i in range(config["iterations"]): + x = np.array([config.get("x{}".format(i + 1)) for i in range(6)]) + train.report( + { + "timesteps_total": i, + "hartmann6": hartmann6(x), + "l2norm": np.sqrt((x**2).sum()), + } + ) + time.sleep(0.02) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + algo = AxSearch( + parameter_constraints=["x1 + x2 <= 2.0"], # Optional. + outcome_constraints=["l2norm <= 1.25"], # Optional. + ) + # Limit to 4 concurrent trials + algo = tune.search.ConcurrencyLimiter(algo, max_concurrent=4) + scheduler = AsyncHyperBandScheduler() + tuner = tune.Tuner( + easy_objective, + run_config=train.RunConfig( + name="ax", + stop={"timesteps_total": 100}, + ), + tune_config=tune.TuneConfig( + metric="hartmann6", # provided in the 'easy_objective' function + mode="min", + search_alg=algo, + scheduler=scheduler, + num_samples=10 if args.smoke_test else 50, + ), + param_space={ + "iterations": 100, + "x1": tune.uniform(0.0, 1.0), + "x2": tune.uniform(0.0, 1.0), + "x3": tune.uniform(0.0, 1.0), + "x4": tune.uniform(0.0, 1.0), + "x5": tune.uniform(0.0, 1.0), + "x6": tune.uniform(0.0, 1.0), + }, + ) + results = tuner.fit() + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/bayesopt_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/bayesopt_example.py new file mode 100644 index 0000000000000000000000000000000000000000..90399ea08a0fb1c2d3ed6ad0b76650e5fa92c69e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/bayesopt_example.py @@ -0,0 +1,63 @@ +"""This example demonstrates the usage of BayesOpt with Ray Tune. + +It also checks that it is usable with a separate scheduler. + +Requires the BayesOpt library to be installed (`pip install bayesian-optimization`). +""" +import time + +from ray import train, tune +from ray.tune.schedulers import AsyncHyperBandScheduler +from ray.tune.search import ConcurrencyLimiter +from ray.tune.search.bayesopt import BayesOptSearch + + +def evaluation_fn(step, width, height): + return (0.1 + width * step / 100) ** (-1) + height * 0.1 + + +def easy_objective(config): + # Hyperparameters + width, height = config["width"], config["height"] + + for step in range(config["steps"]): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Feed the score back back to Tune. + train.report({"iterations": step, "mean_loss": intermediate_score}) + time.sleep(0.1) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + algo = BayesOptSearch(utility_kwargs={"kind": "ucb", "kappa": 2.5, "xi": 0.0}) + algo = ConcurrencyLimiter(algo, max_concurrent=4) + scheduler = AsyncHyperBandScheduler() + tuner = tune.Tuner( + easy_objective, + tune_config=tune.TuneConfig( + metric="mean_loss", + mode="min", + search_alg=algo, + scheduler=scheduler, + num_samples=10 if args.smoke_test else 1000, + ), + run_config=train.RunConfig( + name="my_exp", + ), + param_space={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + }, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/bohb_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/bohb_example.py new file mode 100644 index 0000000000000000000000000000000000000000..2bf409327b740deb1e789a242b31d2995827f5ca --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/bohb_example.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python + +"""This example demonstrates the usage of BOHB with Ray Tune. + +Requires the HpBandSter and ConfigSpace libraries to be installed +(`pip install hpbandster ConfigSpace`). +""" + +import json +import os +import time + +import numpy as np + +import ray +from ray import train, tune +from ray.tune import Trainable +from ray.tune.schedulers.hb_bohb import HyperBandForBOHB +from ray.tune.search.bohb import TuneBOHB + + +class MyTrainableClass(Trainable): + """Example agent whose learning curve is a random sigmoid. + + The dummy hyperparameters "width" and "height" determine the slope and + maximum reward value reached. + """ + + def setup(self, config): + self.timestep = 0 + + def step(self): + self.timestep += 1 + v = np.tanh(float(self.timestep) / self.config.get("width", 1)) + v *= self.config.get("height", 1) + time.sleep(0.1) + # Here we use `episode_reward_mean`, but you can also report other + # objectives such as loss or accuracy. + return {"episode_reward_mean": v} + + def save_checkpoint(self, checkpoint_dir): + path = os.path.join(checkpoint_dir, "checkpoint") + with open(path, "w") as f: + f.write(json.dumps({"timestep": self.timestep})) + + def load_checkpoint(self, checkpoint_dir): + path = os.path.join(checkpoint_dir, "checkpoint") + with open(path, "r") as f: + self.timestep = json.loads(f.read())["timestep"] + + +if __name__ == "__main__": + import sys + + if sys.version_info >= (3, 12): + # TuneBOHB is not compatible with Python 3.12 + sys.exit(0) + + ray.init(num_cpus=8) + + config = { + "iterations": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + "activation": tune.choice(["relu", "tanh"]), + } + + # Optional: Pass the parameter space yourself + # import ConfigSpace as CS + # config_space = CS.ConfigurationSpace() + # config_space.add_hyperparameter( + # CS.UniformFloatHyperparameter("width", lower=0, upper=20)) + # config_space.add_hyperparameter( + # CS.UniformFloatHyperparameter("height", lower=-100, upper=100)) + # config_space.add_hyperparameter( + # CS.CategoricalHyperparameter( + # "activation", choices=["relu", "tanh"])) + + max_iterations = 10 + bohb_hyperband = HyperBandForBOHB( + time_attr="training_iteration", + max_t=max_iterations, + reduction_factor=2, + stop_last_trials=False, + ) + + bohb_search = TuneBOHB( + # space=config_space, # If you want to set the space manually + ) + bohb_search = tune.search.ConcurrencyLimiter(bohb_search, max_concurrent=4) + + tuner = tune.Tuner( + MyTrainableClass, + run_config=train.RunConfig( + name="bohb_test", stop={"training_iteration": max_iterations} + ), + tune_config=tune.TuneConfig( + metric="episode_reward_mean", + mode="max", + scheduler=bohb_hyperband, + search_alg=bohb_search, + num_samples=32, + ), + param_space=config, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/custom_func_checkpointing.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/custom_func_checkpointing.py new file mode 100644 index 0000000000000000000000000000000000000000..39531ad52802b3bed1d89ebecf1f79298f1af30a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/custom_func_checkpointing.py @@ -0,0 +1,70 @@ +# If want to use checkpointing with a custom training function (not a Ray +# integration like PyTorch or Tensorflow), your function can read/write +# checkpoint through the ``ray.train.report(metrics, checkpoint=...)`` API. +import argparse +import json +import os +import tempfile +import time + +from ray import train, tune +from ray.train import Checkpoint + + +def evaluation_fn(step, width, height): + time.sleep(0.1) + return (0.1 + width * step / 100) ** (-1) + height * 0.1 + + +def train_func(config): + step = 0 + width, height = config["width"], config["height"] + + checkpoint = train.get_checkpoint() + if checkpoint: + with checkpoint.as_directory() as checkpoint_dir: + with open(os.path.join(checkpoint_dir, "checkpoint.json")) as f: + state = json.load(f) + step = state["step"] + 1 + + for current_step in range(step, 100): + intermediate_score = evaluation_fn(current_step, width, height) + + with tempfile.TemporaryDirectory() as temp_checkpoint_dir: + with open(os.path.join(temp_checkpoint_dir, "checkpoint.json"), "w") as f: + json.dump({"step": current_step}, f) + train.report( + {"iterations": current_step, "mean_loss": intermediate_score}, + checkpoint=Checkpoint.from_directory(temp_checkpoint_dir), + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + tuner = tune.Tuner( + train_func, + run_config=train.RunConfig( + name="hyperband_test", + stop={"training_iteration": 1 if args.smoke_test else 10}, + ), + tune_config=tune.TuneConfig( + metric="mean_loss", + mode="min", + num_samples=5, + ), + param_space={ + "steps": 10, + "width": tune.randint(10, 100), + "height": tune.loguniform(10, 100), + }, + ) + results = tuner.fit() + best_result = results.get_best_result() + print("Best hyperparameters: ", best_result.config) + best_checkpoint = best_result.checkpoint + print("Best checkpoint: ", best_checkpoint) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/hyperband_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/hyperband_example.py new file mode 100644 index 0000000000000000000000000000000000000000..4e3e8e6759900907959be3cb1cf8cb08462253ec --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/hyperband_example.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +import argparse + +import ray +from ray import train, tune +from ray.tune.schedulers import HyperBandScheduler +from ray.tune.utils.mock_trainable import MyTrainableClass + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + ray.init(num_cpus=4 if args.smoke_test else None) + + # Hyperband early stopping, configured with `episode_reward_mean` as the + # objective and `training_iteration` as the time unit, + # which is automatically filled by Tune. + hyperband = HyperBandScheduler(time_attr="training_iteration", max_t=200) + + tuner = tune.Tuner( + MyTrainableClass, + run_config=train.RunConfig( + name="hyperband_test", + stop={"training_iteration": 1 if args.smoke_test else 200}, + verbose=1, + failure_config=train.FailureConfig( + fail_fast=True, + ), + ), + tune_config=tune.TuneConfig( + num_samples=20 if args.smoke_test else 200, + metric="episode_reward_mean", + mode="max", + scheduler=hyperband, + ), + param_space={"width": tune.randint(10, 90), "height": tune.randint(0, 100)}, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/hyperband_function_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/hyperband_function_example.py new file mode 100644 index 0000000000000000000000000000000000000000..b7ac7935a2eb86bc2342610d0f807f2140746575 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/hyperband_function_example.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +import argparse +import json +import os +import tempfile + +import numpy as np + +import ray +from ray import train, tune +from ray.train import Checkpoint +from ray.tune.schedulers import HyperBandScheduler + + +def train_func(config): + step = 0 + checkpoint = train.get_checkpoint() + if checkpoint: + with checkpoint.as_directory() as checkpoint_dir: + with open(os.path.join(checkpoint_dir, "checkpoint.json")) as f: + step = json.load(f)["timestep"] + 1 + + for timestep in range(step, 100): + v = np.tanh(float(timestep) / config.get("width", 1)) + v *= config.get("height", 1) + + # Checkpoint the state of the training every 3 steps + # Note that this is only required for certain schedulers + with tempfile.TemporaryDirectory() as temp_checkpoint_dir: + checkpoint = None + if timestep % 3 == 0: + with open( + os.path.join(temp_checkpoint_dir, "checkpoint.json"), "w" + ) as f: + json.dump({"timestep": timestep}, f) + checkpoint = Checkpoint.from_directory(temp_checkpoint_dir) + + # Here we use `episode_reward_mean`, but you can also report other + # objectives such as loss or accuracy. + train.report({"episode_reward_mean": v}, checkpoint=checkpoint) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + ray.init(num_cpus=4 if args.smoke_test else None) + + # Hyperband early stopping, configured with `episode_reward_mean` as the + # objective and `training_iteration` as the time unit, + # which is automatically filled by Tune. + hyperband = HyperBandScheduler(max_t=200) + + tuner = tune.Tuner( + train_func, + run_config=train.RunConfig( + name="hyperband_test", + stop={"training_iteration": 10 if args.smoke_test else 99999}, + failure_config=train.FailureConfig( + fail_fast=True, + ), + ), + tune_config=tune.TuneConfig( + num_samples=20, + metric="episode_reward_mean", + mode="max", + scheduler=hyperband, + ), + param_space={"height": tune.uniform(0, 100)}, + ) + results = tuner.fit() + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/hyperopt_conditional_search_space_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/hyperopt_conditional_search_space_example.py new file mode 100644 index 0000000000000000000000000000000000000000..741f9fe23be0ec67bcd730c69d233de1b8ad9d7b --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/hyperopt_conditional_search_space_example.py @@ -0,0 +1,110 @@ +"""This example demonstrates the usage of conditional search spaces with Tune. + +It also checks that it is usable with a separate scheduler. + +Requires the HyperOpt library to be installed (`pip install hyperopt`). + +For an example of using a Tune search space, see +:doc:`/tune/examples/hyperopt_example`. +""" + +import time + +from hyperopt import hp + +import ray +from ray import train, tune +from ray.tune.schedulers import AsyncHyperBandScheduler +from ray.tune.search import ConcurrencyLimiter +from ray.tune.search.hyperopt import HyperOptSearch + + +def f_unpack_dict(dct): + """ + Unpacks all sub-dictionaries in given dictionary recursively. + There should be no duplicated keys across all nested + subdictionaries, or some instances will be lost without warning + + Source: https://www.kaggle.com/fanvacoolt/tutorial-on-hyperopt + + Parameters: + ---------------- + dct : dictionary to unpack + + Returns: + ---------------- + : unpacked dictionary + """ + + res = {} + for k, v in dct.items(): + if isinstance(v, dict): + res = {**res, **f_unpack_dict(v)} + else: + res[k] = v + + return res + + +def evaluation_fn(step, width, height, mult=1): + return (0.1 + width * step / 100) ** (-1) + height * 0.1 * mult + + +def easy_objective(config_in): + # Hyperparameters + config = f_unpack_dict(config_in) + width, height, mult = config["width"], config["height"], config.get("mult", 1) + print(config) + + for step in range(config["steps"]): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height, mult) + # Feed the score back back to Tune. + train.report({"iterations": step, "mean_loss": intermediate_score}) + time.sleep(0.1) + + +config_space = { + "activation": hp.choice( + "activation", + [ + {"activation": "relu", "mult": hp.uniform("mult", 1, 2)}, + {"activation": "tanh"}, + ], + ), + "width": hp.uniform("width", 0, 20), + "height": hp.uniform("heright", -100, 100), + "steps": 100, +} + + +def run_hyperopt_tune(config_dict=config_space, smoke_test=False): + algo = HyperOptSearch(space=config_dict, metric="mean_loss", mode="min") + algo = ConcurrencyLimiter(algo, max_concurrent=4) + scheduler = AsyncHyperBandScheduler() + tuner = tune.Tuner( + easy_objective, + tune_config=tune.TuneConfig( + metric="mean_loss", + mode="min", + search_alg=algo, + scheduler=scheduler, + num_samples=10 if smoke_test else 100, + ), + ) + results = tuner.fit() + print("Best hyperparameters found were: ", results.get_best_result().config) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + ray.init(configure_logging=False) + + run_hyperopt_tune(smoke_test=args.smoke_test) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/logging_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/logging_example.py new file mode 100644 index 0000000000000000000000000000000000000000..44cb4a68d6abd96cbb32aa1bd105cbf1501272d9 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/logging_example.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +import argparse +import time + +from ray import train, tune +from ray.tune.logger import LoggerCallback + + +class TestLoggerCallback(LoggerCallback): + def on_trial_result(self, iteration, trials, trial, result, **info): + print(f"TestLogger for trial {trial}: {result}") + + +def trial_str_creator(trial): + return "{}_{}_123".format(trial.trainable_name, trial.trial_id) + + +def evaluation_fn(step, width, height): + time.sleep(0.1) + return (0.1 + width * step / 100) ** (-1) + height * 0.1 + + +def easy_objective(config): + # Hyperparameters + width, height = config["width"], config["height"] + + for step in range(config["steps"]): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Feed the score back back to Tune. + train.report({"iterations": step, "mean_loss": intermediate_score}) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + tuner = tune.Tuner( + easy_objective, + run_config=train.RunConfig( + name="hyperband_test", + callbacks=[TestLoggerCallback()], + stop={"training_iteration": 1 if args.smoke_test else 100}, + ), + tune_config=tune.TuneConfig( + metric="mean_loss", + mode="min", + num_samples=5, + trial_name_creator=trial_str_creator, + trial_dirname_creator=trial_str_creator, + ), + param_space={ + "steps": 100, + "width": tune.randint(10, 100), + "height": tune.loguniform(10, 100), + }, + ) + results = tuner.fit() + + print("Best hyperparameters: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/mlflow_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/mlflow_example.py new file mode 100644 index 0000000000000000000000000000000000000000..6de85b82eac2b9682db41d5395c1f9cf81df3fac --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/mlflow_example.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +"""Examples using MLfowLoggerCallback and setup_mlflow. +""" +import os +import tempfile +import time + +import mlflow + +from ray import train, tune +from ray.air.integrations.mlflow import MLflowLoggerCallback, setup_mlflow + + +def evaluation_fn(step, width, height): + return (0.1 + width * step / 100) ** (-1) + height * 0.1 + + +def train_function(config): + # Hyperparameters + width, height = config["width"], config["height"] + + for step in range(config.get("steps", 100)): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Feed the score back to Tune. + train.report({"iterations": step, "mean_loss": intermediate_score}) + time.sleep(0.1) + + +def tune_with_callback(mlflow_tracking_uri, finish_fast=False): + + tuner = tune.Tuner( + train_function, + run_config=train.RunConfig( + name="mlflow", + callbacks=[ + MLflowLoggerCallback( + tracking_uri=mlflow_tracking_uri, + experiment_name="example", + save_artifact=True, + ) + ], + ), + tune_config=tune.TuneConfig( + num_samples=5, + ), + param_space={ + "width": tune.randint(10, 100), + "height": tune.randint(0, 100), + "steps": 5 if finish_fast else 100, + }, + ) + tuner.fit() + + +def train_function_mlflow(config): + setup_mlflow(config) + + # Hyperparameters + width, height = config["width"], config["height"] + + for step in range(config.get("steps", 100)): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Log the metrics to mlflow + mlflow.log_metrics(dict(mean_loss=intermediate_score), step=step) + # Feed the score back to Tune. + train.report({"iterations": step, "mean_loss": intermediate_score}) + time.sleep(0.1) + + +def tune_with_setup(mlflow_tracking_uri, finish_fast=False): + # Set the experiment, or create a new one if does not exist yet. + mlflow.set_tracking_uri(mlflow_tracking_uri) + mlflow.set_experiment(experiment_name="mixin_example") + tuner = tune.Tuner( + train_function_mlflow, + run_config=train.RunConfig( + name="mlflow", + ), + tune_config=tune.TuneConfig( + num_samples=5, + ), + param_space={ + "width": tune.randint(10, 100), + "height": tune.randint(0, 100), + "steps": 5 if finish_fast else 100, + "mlflow": { + "experiment_name": "mixin_example", + "tracking_uri": mlflow.get_tracking_uri(), + }, + }, + ) + tuner.fit() + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + parser.add_argument( + "--tracking-uri", + type=str, + help="The tracking URI for the MLflow tracking server.", + ) + args, _ = parser.parse_known_args() + + if args.smoke_test: + mlflow_tracking_uri = os.path.join(tempfile.gettempdir(), "mlruns") + else: + mlflow_tracking_uri = args.tracking_uri + + tune_with_callback(mlflow_tracking_uri, finish_fast=args.smoke_test) + if not args.smoke_test: + df = mlflow.search_runs( + [mlflow.get_experiment_by_name("example").experiment_id] + ) + print(df) + + tune_with_setup(mlflow_tracking_uri, finish_fast=args.smoke_test) + if not args.smoke_test: + df = mlflow.search_runs( + [mlflow.get_experiment_by_name("mixin_example").experiment_id] + ) + print(df) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/mlflow_ptl.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/mlflow_ptl.py new file mode 100644 index 0000000000000000000000000000000000000000..e1828661b98cd8e2013152c3f9aeeaf7cb51aa6d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/mlflow_ptl.py @@ -0,0 +1,105 @@ +"""An example showing how to use Pytorch Lightning training, Ray Tune +HPO, and MLflow autologging all together.""" + +import os +import tempfile + +import mlflow +import pytorch_lightning as pl + +from ray import train, tune +from ray.air.integrations.mlflow import setup_mlflow +from ray.tune.examples.mnist_ptl_mini import LightningMNISTClassifier, MNISTDataModule +from ray.tune.integration.pytorch_lightning import TuneReportCallback + + +def train_mnist_tune(config, data_dir=None, num_epochs=10, num_gpus=0): + setup_mlflow( + config, + experiment_name=config.get("experiment_name", None), + tracking_uri=config.get("tracking_uri", None), + ) + + model = LightningMNISTClassifier(config, data_dir) + dm = MNISTDataModule( + data_dir=data_dir, num_workers=1, batch_size=config["batch_size"] + ) + metrics = {"loss": "ptl/val_loss", "acc": "ptl/val_accuracy"} + mlflow.pytorch.autolog() + trainer = pl.Trainer( + max_epochs=num_epochs, + gpus=num_gpus, + progress_bar_refresh_rate=0, + callbacks=[TuneReportCallback(metrics, on="validation_end")], + ) + trainer.fit(model, dm) + + +def tune_mnist( + num_samples=10, + num_epochs=10, + gpus_per_trial=0, + tracking_uri=None, + experiment_name="ptl_autologging_example", +): + data_dir = os.path.join(tempfile.gettempdir(), "mnist_data_") + # Download data + MNISTDataModule(data_dir=data_dir, batch_size=32).prepare_data() + + # Set the MLflow experiment, or create it if it does not exist. + mlflow.set_tracking_uri(tracking_uri) + mlflow.set_experiment(experiment_name) + + config = { + "layer_1": tune.choice([32, 64, 128]), + "layer_2": tune.choice([64, 128, 256]), + "lr": tune.loguniform(1e-4, 1e-1), + "batch_size": tune.choice([32, 64, 128]), + "experiment_name": experiment_name, + "tracking_uri": mlflow.get_tracking_uri(), + "data_dir": os.path.join(tempfile.gettempdir(), "mnist_data_"), + "num_epochs": num_epochs, + } + + trainable = tune.with_parameters( + train_mnist_tune, + data_dir=data_dir, + num_epochs=num_epochs, + num_gpus=gpus_per_trial, + ) + + tuner = tune.Tuner( + tune.with_resources(trainable, resources={"cpu": 1, "gpu": gpus_per_trial}), + tune_config=tune.TuneConfig( + metric="loss", + mode="min", + num_samples=num_samples, + ), + run_config=train.RunConfig( + name="tune_mnist", + ), + param_space=config, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + if args.smoke_test: + tune_mnist( + num_samples=1, + num_epochs=1, + gpus_per_trial=0, + tracking_uri=os.path.join(tempfile.gettempdir(), "mlruns"), + ) + else: + tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_ptl_mini.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_ptl_mini.py new file mode 100644 index 0000000000000000000000000000000000000000..2c60dc578c14388909014c867a515ced3263cdff --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_ptl_mini.py @@ -0,0 +1,166 @@ +import math +import os + +import pytorch_lightning as pl +import torch +from datasets import load_dataset +from filelock import FileLock +from torch.nn import functional as F +from torch.utils.data import DataLoader +from torchmetrics import Accuracy +from torchvision import transforms + +from ray import train, tune +from ray.tune.integration.pytorch_lightning import TuneReportCheckpointCallback + +PATH_DATASETS = os.environ.get("PATH_DATASETS", ".") + + +class MNISTDataModule(pl.LightningDataModule): + def __init__(self, batch_size: int, data_dir: str = PATH_DATASETS): + super().__init__() + self.data_dir = data_dir + self.transform = transforms.Compose( + [ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)), + ] + ) + self.batch_size = batch_size + self.dims = (1, 28, 28) + self.num_classes = 10 + + def prepare_data(self): + # download + with FileLock(os.path.expanduser("~/.data.lock")): + load_dataset("ylecun/mnist", cache_dir=self.data_dir) + + def setup(self, stage=None): + dataset = load_dataset("ylecun/mnist", cache_dir=self.data_dir) + + def transform_fn(sample): + return (self.transform(sample["image"]), sample["label"]) + + self.mnist_train = [transform_fn(sample) for sample in dataset["train"]] + self.mnist_val = [transform_fn(sample) for sample in dataset["test"]] + + def train_dataloader(self): + return DataLoader(self.mnist_train, batch_size=self.batch_size) + + def val_dataloader(self): + return DataLoader(self.mnist_val, batch_size=self.batch_size) + + +class LightningMNISTClassifier(pl.LightningModule): + def __init__(self, config, data_dir=None): + super(LightningMNISTClassifier, self).__init__() + + self.data_dir = data_dir or os.getcwd() + self.lr = config["lr"] + layer_1, layer_2 = config["layer_1"], config["layer_2"] + self.batch_size = config["batch_size"] + + # mnist images are (1, 28, 28) (channels, width, height) + self.layer_1 = torch.nn.Linear(28 * 28, layer_1) + self.layer_2 = torch.nn.Linear(layer_1, layer_2) + self.layer_3 = torch.nn.Linear(layer_2, 10) + self.accuracy = Accuracy(task="multiclass", num_classes=10, top_k=1) + + def forward(self, x): + batch_size, channels, width, height = x.size() + x = x.view(batch_size, -1) + x = self.layer_1(x) + x = torch.relu(x) + x = self.layer_2(x) + x = torch.relu(x) + x = self.layer_3(x) + x = torch.log_softmax(x, dim=1) + return x + + def configure_optimizers(self): + return torch.optim.Adam(self.parameters(), lr=self.lr) + + def training_step(self, train_batch, batch_idx): + x, y = train_batch + logits = self.forward(x) + loss = F.nll_loss(logits, y) + acc = self.accuracy(logits, y) + self.log("ptl/train_loss", loss) + self.log("ptl/train_accuracy", acc) + return loss + + def validation_step(self, val_batch, batch_idx): + x, y = val_batch + logits = self.forward(x) + loss = F.nll_loss(logits, y) + acc = self.accuracy(logits, y) + return {"val_loss": loss, "val_accuracy": acc} + + def validation_epoch_end(self, outputs): + avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean() + avg_acc = torch.stack([x["val_accuracy"] for x in outputs]).mean() + self.log("ptl/val_loss", avg_loss) + self.log("ptl/val_accuracy", avg_acc) + + +def train_mnist_tune(config, num_epochs=10, num_gpus=0): + data_dir = os.path.abspath("./data") + model = LightningMNISTClassifier(config, data_dir) + with FileLock(os.path.expanduser("~/.data.lock")): + dm = MNISTDataModule(data_dir=data_dir, batch_size=config["batch_size"]) + metrics = {"loss": "ptl/val_loss", "acc": "ptl/val_accuracy"} + trainer = pl.Trainer( + max_epochs=num_epochs, + # If fractional GPUs passed in, convert to int. + gpus=math.ceil(num_gpus), + enable_progress_bar=False, + callbacks=[ + TuneReportCheckpointCallback( + metrics, on="validation_end", save_checkpoints=False + ) + ], + ) + trainer.fit(model, dm) + + +def tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0): + config = { + "layer_1": tune.choice([32, 64, 128]), + "layer_2": tune.choice([64, 128, 256]), + "lr": tune.loguniform(1e-4, 1e-1), + "batch_size": tune.choice([32, 64, 128]), + } + + trainable = tune.with_parameters( + train_mnist_tune, num_epochs=num_epochs, num_gpus=gpus_per_trial + ) + tuner = tune.Tuner( + tune.with_resources(trainable, resources={"cpu": 1, "gpu": gpus_per_trial}), + tune_config=tune.TuneConfig( + metric="loss", + mode="min", + num_samples=num_samples, + ), + run_config=train.RunConfig( + name="tune_mnist", + ), + param_space=config, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + if args.smoke_test: + tune_mnist(num_samples=1, num_epochs=1, gpus_per_trial=0) + else: + tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_pytorch.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_pytorch.py new file mode 100644 index 0000000000000000000000000000000000000000..e4962b185e36010df95581feea742feb62bf4f20 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_pytorch.py @@ -0,0 +1,161 @@ +# Original Code here: +# https://github.com/pytorch/examples/blob/master/mnist/main.py + +import argparse +import os +import tempfile + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from filelock import FileLock +from torchvision import datasets, transforms + +import ray +from ray import train, tune +from ray.train import Checkpoint +from ray.tune.schedulers import AsyncHyperBandScheduler + +# Change these values if you want the training to run quicker or slower. +EPOCH_SIZE = 512 +TEST_SIZE = 256 + + +class ConvNet(nn.Module): + def __init__(self): + super(ConvNet, self).__init__() + self.conv1 = nn.Conv2d(1, 3, kernel_size=3) + self.fc = nn.Linear(192, 10) + + def forward(self, x): + x = F.relu(F.max_pool2d(self.conv1(x), 3)) + x = x.view(-1, 192) + x = self.fc(x) + return F.log_softmax(x, dim=1) + + +def train_func(model, optimizer, train_loader, device=None): + device = device or torch.device("cpu") + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + if batch_idx * len(data) > EPOCH_SIZE: + return + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + + +def test_func(model, data_loader, device=None): + device = device or torch.device("cpu") + model.eval() + correct = 0 + total = 0 + with torch.no_grad(): + for batch_idx, (data, target) in enumerate(data_loader): + if batch_idx * len(data) > TEST_SIZE: + break + data, target = data.to(device), target.to(device) + outputs = model(data) + _, predicted = torch.max(outputs.data, 1) + total += target.size(0) + correct += (predicted == target).sum().item() + + return correct / total + + +def get_data_loaders(batch_size=64): + mnist_transforms = transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] + ) + + # We add FileLock here because multiple workers will want to + # download data, and this may cause overwrites since + # DataLoader is not threadsafe. + with FileLock(os.path.expanduser("~/data.lock")): + train_loader = torch.utils.data.DataLoader( + datasets.MNIST( + "~/data", train=True, download=True, transform=mnist_transforms + ), + batch_size=batch_size, + shuffle=True, + ) + test_loader = torch.utils.data.DataLoader( + datasets.MNIST( + "~/data", train=False, download=True, transform=mnist_transforms + ), + batch_size=batch_size, + shuffle=True, + ) + return train_loader, test_loader + + +def train_mnist(config): + should_checkpoint = config.get("should_checkpoint", False) + use_cuda = torch.cuda.is_available() + device = torch.device("cuda" if use_cuda else "cpu") + train_loader, test_loader = get_data_loaders() + model = ConvNet().to(device) + + optimizer = optim.SGD( + model.parameters(), lr=config["lr"], momentum=config["momentum"] + ) + + while True: + train_func(model, optimizer, train_loader, device) + acc = test_func(model, test_loader, device) + metrics = {"mean_accuracy": acc} + + # Report metrics (and possibly a checkpoint) + if should_checkpoint: + with tempfile.TemporaryDirectory() as tempdir: + torch.save(model.state_dict(), os.path.join(tempdir, "model.pt")) + train.report(metrics, checkpoint=Checkpoint.from_directory(tempdir)) + else: + train.report(metrics) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="PyTorch MNIST Example") + parser.add_argument( + "--cuda", action="store_true", default=False, help="Enables GPU training" + ) + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + ray.init(num_cpus=2 if args.smoke_test else None) + + # for early stopping + sched = AsyncHyperBandScheduler() + + resources_per_trial = {"cpu": 2, "gpu": int(args.cuda)} # set this for GPUs + tuner = tune.Tuner( + tune.with_resources(train_mnist, resources=resources_per_trial), + tune_config=tune.TuneConfig( + metric="mean_accuracy", + mode="max", + scheduler=sched, + num_samples=1 if args.smoke_test else 50, + ), + run_config=train.RunConfig( + name="exp", + stop={ + "mean_accuracy": 0.98, + "training_iteration": 5 if args.smoke_test else 100, + }, + ), + param_space={ + "lr": tune.loguniform(1e-4, 1e-2), + "momentum": tune.uniform(0.1, 0.9), + }, + ) + results = tuner.fit() + + print("Best config is:", results.get_best_result().config) + + assert not results.errors diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_pytorch_trainable.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_pytorch_trainable.py new file mode 100644 index 0000000000000000000000000000000000000000..9bcf34ffba245fe1f803d9c3530a509cd77de9c9 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/mnist_pytorch_trainable.py @@ -0,0 +1,98 @@ +# Original Code here: +# https://github.com/pytorch/examples/blob/master/mnist/main.py +from __future__ import print_function + +import argparse +import os + +import torch +import torch.optim as optim + +import ray +from ray import train, tune +from ray.tune.examples.mnist_pytorch import ( + ConvNet, + get_data_loaders, + test_func, + train_func, +) +from ray.tune.schedulers import ASHAScheduler + +# Change these values if you want the training to run quicker or slower. +EPOCH_SIZE = 512 +TEST_SIZE = 256 + +# Training settings +parser = argparse.ArgumentParser(description="PyTorch MNIST Example") +parser.add_argument( + "--use-gpu", action="store_true", default=False, help="enables CUDA training" +) +parser.add_argument("--ray-address", type=str, help="The Redis address of the cluster.") +parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" +) + + +# Below comments are for documentation purposes only. +# fmt: off +# __trainable_example_begin__ +class TrainMNIST(tune.Trainable): + def setup(self, config): + use_cuda = config.get("use_gpu") and torch.cuda.is_available() + self.device = torch.device("cuda" if use_cuda else "cpu") + self.train_loader, self.test_loader = get_data_loaders() + self.model = ConvNet().to(self.device) + self.optimizer = optim.SGD( + self.model.parameters(), + lr=config.get("lr", 0.01), + momentum=config.get("momentum", 0.9)) + + def step(self): + train_func( + self.model, self.optimizer, self.train_loader, device=self.device) + acc = test_func(self.model, self.test_loader, self.device) + return {"mean_accuracy": acc} + + def save_checkpoint(self, checkpoint_dir): + checkpoint_path = os.path.join(checkpoint_dir, "model.pth") + torch.save(self.model.state_dict(), checkpoint_path) + + def load_checkpoint(self, checkpoint_dir): + checkpoint_path = os.path.join(checkpoint_dir, "model.pth") + self.model.load_state_dict(torch.load(checkpoint_path)) + + +# __trainable_example_end__ +# fmt: on + +if __name__ == "__main__": + args = parser.parse_args() + ray.init(address=args.ray_address, num_cpus=6 if args.smoke_test else None) + sched = ASHAScheduler() + + tuner = tune.Tuner( + tune.with_resources(TrainMNIST, resources={"cpu": 3, "gpu": int(args.use_gpu)}), + run_config=train.RunConfig( + stop={ + "mean_accuracy": 0.95, + "training_iteration": 3 if args.smoke_test else 20, + }, + checkpoint_config=train.CheckpointConfig( + checkpoint_at_end=True, checkpoint_frequency=3 + ), + ), + tune_config=tune.TuneConfig( + metric="mean_accuracy", + mode="max", + scheduler=sched, + num_samples=1 if args.smoke_test else 20, + ), + param_space={ + "args": args, + "lr": tune.uniform(0.001, 0.1), + "momentum": tune.uniform(0.1, 0.9), + }, + ) + results = tuner.fit() + + print("Best config is:", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/nevergrad_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/nevergrad_example.py new file mode 100644 index 0000000000000000000000000000000000000000..e579b781c857d1fd7ec3f0345a811d0b2c235989 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/nevergrad_example.py @@ -0,0 +1,77 @@ +"""This example demonstrates the usage of Nevergrad with Ray Tune. + +It also checks that it is usable with a separate scheduler. + +Requires the Nevergrad library to be installed (`pip install nevergrad`). +""" + +import time + +from ray import train, tune +from ray.tune.schedulers import AsyncHyperBandScheduler +from ray.tune.search import ConcurrencyLimiter +from ray.tune.search.nevergrad import NevergradSearch + + +def evaluation_fn(step, width, height): + return (0.1 + width * step / 100) ** (-1) + height * 0.1 + + +def easy_objective(config): + # Hyperparameters + width, height = config["width"], config["height"] + + for step in range(config["steps"]): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Feed the score back back to Tune. + train.report({"iterations": step, "mean_loss": intermediate_score}) + time.sleep(0.1) + + +if __name__ == "__main__": + import argparse + + import nevergrad as ng + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + # Optional: Pass the parameter space yourself + # space = ng.p.Dict( + # width=ng.p.Scalar(lower=0, upper=20), + # height=ng.p.Scalar(lower=-100, upper=100), + # activation=ng.p.Choice(choices=["relu", "tanh"]) + # ) + + algo = NevergradSearch( + optimizer=ng.optimizers.OnePlusOne, + # space=space, # If you want to set the space manually + ) + algo = ConcurrencyLimiter(algo, max_concurrent=4) + + scheduler = AsyncHyperBandScheduler() + + tuner = tune.Tuner( + easy_objective, + tune_config=tune.TuneConfig( + metric="mean_loss", + mode="min", + search_alg=algo, + scheduler=scheduler, + num_samples=10 if args.smoke_test else 50, + ), + run_config=train.RunConfig(name="nevergrad"), + param_space={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + "activation": tune.choice(["relu", "tanh"]), + }, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/optuna_define_by_run_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/optuna_define_by_run_example.py new file mode 100644 index 0000000000000000000000000000000000000000..443fa3549ca15ba5c0f30f6fd3bd5bfe9729e3b5 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/optuna_define_by_run_example.py @@ -0,0 +1,93 @@ +"""This example demonstrates the usage of Optuna define-by-run with Ray Tune. + +It also checks that it is usable with a separate scheduler. + +Requires the Optuna library to be installed (`pip install optuna`). + +For an example of using a Tune search space, see +:doc:`/tune/examples/optuna_example`. +""" + +import time +from typing import Any, Dict, Optional + +import ray +from ray import train, tune +from ray.tune.schedulers import AsyncHyperBandScheduler +from ray.tune.search import ConcurrencyLimiter +from ray.tune.search.optuna import OptunaSearch + + +def evaluation_fn(step, width, height, mult=1): + return (0.1 + width * step / 100) ** (-1) + height * 0.1 * mult + + +def easy_objective(config): + # Hyperparameters + width, height, mult = config["width"], config["height"], config.get("mult", 1) + print(config) + + for step in range(config["steps"]): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height, mult) + # Feed the score back back to Tune. + train.report({"iterations": step, "mean_loss": intermediate_score}) + time.sleep(0.1) + + +def define_by_run_func(trial) -> Optional[Dict[str, Any]]: + """Define-by-run function to create the search space. + + Ensure no actual computation takes place here. That should go into + the trainable passed to ``Tuner`` (in this example, that's + ``easy_objective``). + + For more information, see https://optuna.readthedocs.io/en/stable\ +/tutorial/10_key_features/002_configurations.html + + This function should either return None or a dict with constant values. + """ + # This param is not used in the objective function. + activation = trial.suggest_categorical("activation", ["relu", "tanh"]) + trial.suggest_float("width", 0, 20) + trial.suggest_float("height", -100, 100) + + # Define-by-run allows for conditional search spaces. + if activation == "relu": + trial.suggest_float("mult", 1, 2) + + # Return all constants in a dictionary. + return {"steps": 100} + + +def run_optuna_tune(smoke_test=False): + algo = OptunaSearch(space=define_by_run_func, metric="mean_loss", mode="min") + algo = ConcurrencyLimiter(algo, max_concurrent=4) + scheduler = AsyncHyperBandScheduler() + tuner = tune.Tuner( + easy_objective, + tune_config=tune.TuneConfig( + metric="mean_loss", + mode="min", + search_alg=algo, + scheduler=scheduler, + num_samples=10 if smoke_test else 100, + ), + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + ray.init(configure_logging=False) + + run_optuna_tune(smoke_test=args.smoke_test) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/optuna_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/optuna_example.py new file mode 100644 index 0000000000000000000000000000000000000000..17a7dfc7fe7209c4f615274a21e7214c49d5857d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/optuna_example.py @@ -0,0 +1,73 @@ +"""This example demonstrates the usage of Optuna with Ray Tune. + +It also checks that it is usable with a separate scheduler. + +Requires the Optuna library to be installed (`pip install optuna`). + +For an example of using an Optuna define-by-run function, see +:doc:`/tune/examples/optuna_define_by_run_example`. +""" + +import time + +import ray +from ray import train, tune +from ray.tune.schedulers import AsyncHyperBandScheduler +from ray.tune.search import ConcurrencyLimiter +from ray.tune.search.optuna import OptunaSearch + + +def evaluation_fn(step, width, height): + return (0.1 + width * step / 100) ** (-1) + height * 0.1 + + +def easy_objective(config): + # Hyperparameters + width, height = config["width"], config["height"] + + for step in range(config["steps"]): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Feed the score back back to Tune. + train.report({"iterations": step, "mean_loss": intermediate_score}) + time.sleep(0.1) + + +def run_optuna_tune(smoke_test=False): + algo = OptunaSearch() + algo = ConcurrencyLimiter(algo, max_concurrent=4) + scheduler = AsyncHyperBandScheduler() + tuner = tune.Tuner( + easy_objective, + tune_config=tune.TuneConfig( + metric="mean_loss", + mode="min", + search_alg=algo, + scheduler=scheduler, + num_samples=10 if smoke_test else 100, + ), + param_space={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + # This is an ignored parameter. + "activation": tune.choice(["relu", "tanh"]), + }, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + ray.init(configure_logging=False) + + run_optuna_tune(smoke_test=args.smoke_test) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/optuna_multiobjective_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/optuna_multiobjective_example.py new file mode 100644 index 0000000000000000000000000000000000000000..8b686c8db2741edef273ee303a4618a331132fa7 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/optuna_multiobjective_example.py @@ -0,0 +1,79 @@ +"""This example demonstrates the usage of Optuna with Ray Tune for +multi-objective optimization. + +Please note that schedulers may not work correctly with multi-objective +optimization. + +Requires the Optuna library to be installed (`pip install optuna`). +""" +import time + +import ray +from ray import train, tune +from ray.tune.search import ConcurrencyLimiter +from ray.tune.search.optuna import OptunaSearch + + +def evaluation_fn(step, width, height): + return (0.1 + width * step / 100) ** (-1) + height * 0.1 + + +def easy_objective(config): + # Hyperparameters + width, height = config["width"], config["height"] + + for step in range(config["steps"]): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Feed the score back back to Tune. + train.report( + { + "iterations": step, + "loss": intermediate_score, + "gain": intermediate_score * width, + } + ) + time.sleep(0.1) + + +def run_optuna_tune(smoke_test=False): + algo = OptunaSearch(metric=["loss", "gain"], mode=["min", "max"]) + algo = ConcurrencyLimiter(algo, max_concurrent=4) + tuner = tune.Tuner( + easy_objective, + tune_config=tune.TuneConfig( + search_alg=algo, + num_samples=10 if smoke_test else 100, + ), + param_space={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + # This is an ignored parameter. + "activation": tune.choice(["relu", "tanh"]), + }, + ) + results = tuner.fit() + + print( + "Best hyperparameters for loss found were: ", + results.get_best_result("loss", "min").config, + ) + print( + "Best hyperparameters for gain found were: ", + results.get_best_result("gain", "max").config, + ) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + ray.init(configure_logging=False) + + run_optuna_tune(smoke_test=args.smoke_test) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pb2_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pb2_example.py new file mode 100644 index 0000000000000000000000000000000000000000..ba1e94fb3d021d6c6114aab12ed37eb674df94ee --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/pb2_example.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python + +import argparse + +import ray +from ray import train, tune +from ray.tune.examples.pbt_function import pbt_function +from ray.tune.schedulers.pb2 import PB2 + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + if args.smoke_test: + ray.init(num_cpus=2) # force pausing to happen for test + + perturbation_interval = 5 + pbt = PB2( + time_attr="training_iteration", + perturbation_interval=perturbation_interval, + hyperparam_bounds={ + # hyperparameter bounds. + "lr": [0.0001, 0.02], + }, + ) + + tuner = tune.Tuner( + pbt_function, + run_config=train.RunConfig( + name="pbt_test", + verbose=False, + stop={ + "training_iteration": 30, + }, + failure_config=train.FailureConfig( + fail_fast=True, + ), + ), + tune_config=tune.TuneConfig( + scheduler=pbt, + metric="mean_accuracy", + mode="max", + num_samples=8, + reuse_actors=True, + ), + param_space={ + "lr": 0.0001, + # note: this parameter is perturbed but has no effect on + # the model training in this example + "some_other_factor": 1, + # This parameter is not perturbed and is used to determine + # checkpoint frequency. We set checkpoints and perturbations + # to happen at the same frequency. + "checkpoint_interval": perturbation_interval, + }, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pb2_ppo_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pb2_ppo_example.py new file mode 100644 index 0000000000000000000000000000000000000000..75d184beefdedf412bb48d48dd628164ac8518c1 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/pb2_ppo_example.py @@ -0,0 +1,157 @@ +import argparse +import os +import random +from datetime import datetime + +import pandas as pd + +from ray.tune import run, sample_from +from ray.tune.schedulers import PopulationBasedTraining +from ray.tune.schedulers.pb2 import PB2 + + +# Postprocess the perturbed config to ensure it's still valid used if PBT. +def explore(config): + # Ensure we collect enough timesteps to do sgd. + if config["train_batch_size"] < config["sgd_minibatch_size"] * 2: + config["train_batch_size"] = config["sgd_minibatch_size"] * 2 + # Ensure we run at least one sgd iter. + if config["lambda"] > 1: + config["lambda"] = 1 + config["train_batch_size"] = int(config["train_batch_size"]) + return config + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--max", type=int, default=1000000) + parser.add_argument("--algo", type=str, default="PPO") + parser.add_argument("--num_workers", type=int, default=4) + parser.add_argument("--num_samples", type=int, default=4) + parser.add_argument("--t_ready", type=int, default=50000) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument( + "--horizon", type=int, default=1600 + ) # make this 1000 for other envs + parser.add_argument("--perturb", type=float, default=0.25) # if using PBT + parser.add_argument("--env_name", type=str, default="BipedalWalker-v2") + parser.add_argument( + "--criteria", type=str, default="timesteps_total" + ) # "training_iteration", "time_total_s" + parser.add_argument( + "--net", type=str, default="32_32" + ) # May be important to use a larger network for bigger tasks. + parser.add_argument("--filename", type=str, default="") + parser.add_argument("--method", type=str, default="pb2") # ['pbt', 'pb2'] + parser.add_argument("--save_csv", type=bool, default=False) + + args = parser.parse_args() + + # bipedalwalker needs 1600 + if args.env_name in ["BipedalWalker-v2", "BipedalWalker-v3"]: + horizon = 1600 + else: + horizon = 1000 + + pbt = PopulationBasedTraining( + time_attr=args.criteria, + metric="episode_reward_mean", + mode="max", + perturbation_interval=args.t_ready, + resample_probability=args.perturb, + quantile_fraction=args.perturb, # copy bottom % with top % + # Specifies the search space for these hyperparams + hyperparam_mutations={ + "lambda": lambda: random.uniform(0.9, 1.0), + "clip_param": lambda: random.uniform(0.1, 0.5), + "lr": lambda: random.uniform(1e-3, 1e-5), + "train_batch_size": lambda: random.randint(1000, 60000), + }, + custom_explore_fn=explore, + ) + + pb2 = PB2( + time_attr=args.criteria, + metric="episode_reward_mean", + mode="max", + perturbation_interval=args.t_ready, + quantile_fraction=args.perturb, # copy bottom % with top % + # Specifies the hyperparam search space + hyperparam_bounds={ + "lambda": [0.9, 1.0], + "clip_param": [0.1, 0.5], + "lr": [1e-5, 1e-3], + "train_batch_size": [1000, 60000], + }, + ) + + methods = {"pbt": pbt, "pb2": pb2} + + timelog = ( + str(datetime.date(datetime.now())) + "_" + str(datetime.time(datetime.now())) + ) + + args.dir = "{}_{}_{}_Size{}_{}_{}".format( + args.algo, + args.filename, + args.method, + str(args.num_samples), + args.env_name, + args.criteria, + ) + + analysis = run( + args.algo, + name="{}_{}_{}_seed{}_{}".format( + timelog, args.method, args.env_name, str(args.seed), args.filename + ), + scheduler=methods[args.method], + verbose=1, + num_samples=args.num_samples, + reuse_actors=True, + stop={args.criteria: args.max}, + config={ + "env": args.env_name, + "log_level": "INFO", + "seed": args.seed, + "kl_coeff": 1.0, + "num_gpus": 0, + "horizon": horizon, + "observation_filter": "MeanStdFilter", + "model": { + "fcnet_hiddens": [ + int(args.net.split("_")[0]), + int(args.net.split("_")[1]), + ], + "free_log_std": True, + }, + "num_sgd_iter": 10, + "sgd_minibatch_size": 128, + "lambda": sample_from(lambda spec: random.uniform(0.9, 1.0)), + "clip_param": sample_from(lambda spec: random.uniform(0.1, 0.5)), + "lr": sample_from(lambda spec: random.uniform(1e-3, 1e-5)), + "train_batch_size": sample_from(lambda spec: random.randint(1000, 60000)), + }, + ) + + all_dfs = list(analysis.trial_dataframes.values()) + + results = pd.DataFrame() + for i in range(args.num_samples): + df = all_dfs[i] + df = df[ + [ + "timesteps_total", + "episodes_total", + "episode_reward_mean", + "info/learner/default_policy/cur_kl_coeff", + ] + ] + df["Agent"] = i + results = pd.concat([results, df]).reset_index(drop=True) + + if args.save_csv: + if not (os.path.exists("data/" + args.dir)): + os.makedirs("data/" + args.dir) + + results.to_csv("data/{}/seed{}.csv".format(args.dir, str(args.seed))) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_convnet_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_convnet_example.py new file mode 100644 index 0000000000000000000000000000000000000000..e9d7a3936a595875e86b92b68c7d706afdd204dc --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_convnet_example.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python + +# ruff: noqa +# fmt: off + +# __tutorial_imports_begin__ +import argparse +import os + +import numpy as np +import torch +import torch.optim as optim +from torchvision import datasets + +import ray +from ray import train, tune +from ray.tune.examples.mnist_pytorch import ( + ConvNet, + get_data_loaders, + test_func, + train_func, +) +from ray.tune.schedulers import PopulationBasedTraining +from ray.tune.utils import validate_save_restore + +# __tutorial_imports_end__ + + +# __trainable_begin__ +class PytorchTrainable(tune.Trainable): + """Train a Pytorch ConvNet with Trainable and PopulationBasedTraining + scheduler. The example reuse some of the functions in mnist_pytorch, + and is a good demo for how to add the tuning function without + changing the original training code. + """ + + def setup(self, config): + self.train_loader, self.test_loader = get_data_loaders() + self.model = ConvNet() + self.optimizer = optim.SGD( + self.model.parameters(), + lr=config.get("lr", 0.01), + momentum=config.get("momentum", 0.9)) + + def step(self): + train_func(self.model, self.optimizer, self.train_loader) + acc = test_func(self.model, self.test_loader) + return {"mean_accuracy": acc} + + def save_checkpoint(self, checkpoint_dir): + checkpoint_path = os.path.join(checkpoint_dir, "model.pth") + torch.save(self.model.state_dict(), checkpoint_path) + + def load_checkpoint(self, checkpoint_dir): + checkpoint_path = os.path.join(checkpoint_dir, "model.pth") + self.model.load_state_dict(torch.load(checkpoint_path)) + + def reset_config(self, new_config): + for param_group in self.optimizer.param_groups: + if "lr" in new_config: + param_group["lr"] = new_config["lr"] + if "momentum" in new_config: + param_group["momentum"] = new_config["momentum"] + + self.config = new_config + return True +# __trainable_end__ + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing") + args, _ = parser.parse_known_args() + + ray.init(num_cpus=2) + datasets.MNIST("~/data", train=True, download=True) + + # check if PytorchTrainble will save/restore correctly before execution + validate_save_restore(PytorchTrainable) + + # __pbt_begin__ + scheduler = PopulationBasedTraining( + time_attr="training_iteration", + perturbation_interval=5, + hyperparam_mutations={ + # distribution for resampling + "lr": lambda: np.random.uniform(0.0001, 1), + # allow perturbations within this set of categorical values + "momentum": [0.8, 0.9, 0.99], + }) + # __pbt_end__ + + # __tune_begin__ + class CustomStopper(tune.Stopper): + def __init__(self): + self.should_stop = False + + def __call__(self, trial_id, result): + max_iter = 5 if args.smoke_test else 100 + if not self.should_stop and result["mean_accuracy"] > 0.96: + self.should_stop = True + return self.should_stop or result["training_iteration"] >= max_iter + + def stop_all(self): + return self.should_stop + + stopper = CustomStopper() + + tuner = tune.Tuner( + PytorchTrainable, + run_config=train.RunConfig( + name="pbt_test", + stop=stopper, + verbose=1, + checkpoint_config=train.CheckpointConfig( + checkpoint_score_attribute="mean_accuracy", + checkpoint_frequency=5, + num_to_keep=4, + ), + ), + tune_config=tune.TuneConfig( + scheduler=scheduler, + metric="mean_accuracy", + mode="max", + num_samples=4, + reuse_actors=True, + ), + param_space={ + "lr": tune.uniform(0.001, 1), + "momentum": tune.uniform(0.001, 1), + }, + ) + results = tuner.fit() + # __tune_end__ + + best_result = results.get_best_result() + best_checkpoint = best_result.checkpoint diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_convnet_function_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_convnet_function_example.py new file mode 100644 index 0000000000000000000000000000000000000000..b4ef69507ba6fb66d0c69833595f09922c001768 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_convnet_function_example.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python + +# __tutorial_imports_begin__ +import argparse +import os + +import numpy as np +import torch +import torch.optim as optim + +import ray +from ray import train, tune +from ray.train import Checkpoint +from ray.tune.examples.mnist_pytorch import ConvNet, get_data_loaders, test_func +from ray.tune.schedulers import PopulationBasedTraining + +# __tutorial_imports_end__ + + +# __train_begin__ +def train_convnet(config): + # Create our data loaders, model, and optmizer. + step = 0 + train_loader, test_loader = get_data_loaders() + model = ConvNet() + optimizer = optim.SGD( + model.parameters(), + lr=config.get("lr", 0.01), + momentum=config.get("momentum", 0.9), + ) + + # If `get_checkpoint()` is not None, then we are resuming from a checkpoint. + # Load model state and iteration step from checkpoint. + if train.get_checkpoint(): + print("Loading from checkpoint.") + loaded_checkpoint = train.get_checkpoint() + with loaded_checkpoint.as_directory() as loaded_checkpoint_dir: + path = os.path.join(loaded_checkpoint_dir, "checkpoint.pt") + checkpoint = torch.load(path) + model.load_state_dict(checkpoint["model"]) + step = checkpoint["step"] + + while True: + ray.tune.examples.mnist_pytorch.train_func(model, optimizer, train_loader) + acc = test_func(model, test_loader) + checkpoint = None + if step % 5 == 0: + # Every 5 steps, checkpoint our current state. + # First get the checkpoint directory from tune. + # Need to create a directory under current working directory + # to construct checkpoint object from. + os.makedirs("my_model", exist_ok=True) + torch.save( + { + "step": step, + "model": model.state_dict(), + }, + "my_model/checkpoint.pt", + ) + checkpoint = Checkpoint.from_directory("my_model") + + step += 1 + train.report({"mean_accuracy": acc}, checkpoint=checkpoint) + + +# __train_end__ + + +def eval_best_model(results: tune.ResultGrid): + """Test the best model given output of tuner.fit().""" + with results.get_best_result().checkpoint.as_directory() as best_checkpoint_path: + best_model = ConvNet() + best_checkpoint = torch.load( + os.path.join(best_checkpoint_path, "checkpoint.pt") + ) + best_model.load_state_dict(best_checkpoint["model"]) + # Note that test only runs on a small random set of the test data, thus the + # accuracy may be different from metrics shown in tuning process. + test_acc = test_func(best_model, get_data_loaders()[1]) + print("best model accuracy: ", test_acc) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + # __pbt_begin__ + scheduler = PopulationBasedTraining( + time_attr="training_iteration", + perturbation_interval=5, + hyperparam_mutations={ + # distribution for resampling + "lr": lambda: np.random.uniform(0.0001, 1), + # allow perturbations within this set of categorical values + "momentum": [0.8, 0.9, 0.99], + }, + ) + + # __pbt_end__ + + # __tune_begin__ + class CustomStopper(tune.Stopper): + def __init__(self): + self.should_stop = False + + def __call__(self, trial_id, result): + max_iter = 5 if args.smoke_test else 100 + if not self.should_stop and result["mean_accuracy"] > 0.96: + self.should_stop = True + return self.should_stop or result["training_iteration"] >= max_iter + + def stop_all(self): + return self.should_stop + + stopper = CustomStopper() + + tuner = tune.Tuner( + train_convnet, + run_config=train.RunConfig( + name="pbt_test", + stop=stopper, + verbose=1, + checkpoint_config=train.CheckpointConfig( + checkpoint_score_attribute="mean_accuracy", + num_to_keep=4, + ), + ), + tune_config=tune.TuneConfig( + scheduler=scheduler, + metric="mean_accuracy", + mode="max", + num_samples=4, + reuse_actors=True, + ), + param_space={ + "lr": tune.uniform(0.001, 1), + "momentum": tune.uniform(0.001, 1), + }, + ) + results = tuner.fit() + # __tune_end__ + + eval_best_model(results) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_example.py new file mode 100644 index 0000000000000000000000000000000000000000..73a157b1c76c58d2b11f7681a6e88cfa547c1f96 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_example.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python + +import argparse +import random + +import numpy as np + +import ray +from ray import train, tune +from ray.tune.schedulers import PopulationBasedTraining + + +class PBTBenchmarkExample(tune.Trainable): + """Toy PBT problem for benchmarking adaptive learning rate. + + The goal is to optimize this trainable's accuracy. The accuracy increases + fastest at the optimal lr, which is a function of the current accuracy. + + The optimal lr schedule for this problem is the triangle wave as follows. + Note that many lr schedules for real models also follow this shape: + + best lr + ^ + | /\ + | / \ + | / \ + | / \ + ------------> accuracy + + In this problem, using PBT with a population of 2-4 is sufficient to + roughly approximate this lr schedule. Higher population sizes will yield + faster convergence. Training will not converge without PBT. + """ + + def setup(self, config): + self.lr = config["lr"] + self.accuracy = 0.0 # end = 1000 + + def step(self): + midpoint = 100 # lr starts decreasing after acc > midpoint + q_tolerance = 3 # penalize exceeding lr by more than this multiple + noise_level = 2 # add gaussian noise to the acc increase + # triangle wave: + # - start at 0.001 @ t=0, + # - peak at 0.01 @ t=midpoint, + # - end at 0.001 @ t=midpoint * 2, + if self.accuracy < midpoint: + optimal_lr = 0.01 * self.accuracy / midpoint + else: + optimal_lr = 0.01 - 0.01 * (self.accuracy - midpoint) / midpoint + optimal_lr = min(0.01, max(0.001, optimal_lr)) + + # compute accuracy increase + q_err = max(self.lr, optimal_lr) / min(self.lr, optimal_lr) + if q_err < q_tolerance: + self.accuracy += (1.0 / q_err) * random.random() + elif self.lr > optimal_lr: + self.accuracy -= (q_err - q_tolerance) * random.random() + self.accuracy += noise_level * np.random.normal() + self.accuracy = max(0, self.accuracy) + + return { + "mean_accuracy": self.accuracy, + "cur_lr": self.lr, + "optimal_lr": optimal_lr, # for debugging + "q_err": q_err, # for debugging + "done": self.accuracy > midpoint * 2, + } + + def save_checkpoint(self, checkpoint_dir): + return { + "accuracy": self.accuracy, + "lr": self.lr, + } + + def load_checkpoint(self, checkpoint): + self.accuracy = checkpoint["accuracy"] + + def reset_config(self, new_config): + self.lr = new_config["lr"] + self.config = new_config + return True + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + if args.smoke_test: + ray.init(num_cpus=2) # force pausing to happen for test + + perturbation_interval = 5 + pbt = PopulationBasedTraining( + time_attr="training_iteration", + perturbation_interval=perturbation_interval, + hyperparam_mutations={ + # distribution for resampling + "lr": lambda: random.uniform(0.0001, 0.02), + # allow perturbations within this set of categorical values + "some_other_factor": [1, 2], + }, + ) + + tuner = tune.Tuner( + PBTBenchmarkExample, + run_config=train.RunConfig( + name="pbt_class_api_example", + # Stop when done = True or at some # of train steps (whichever comes first) + stop={ + "done": True, + "training_iteration": 10 if args.smoke_test else 1000, + }, + verbose=0, + # We recommend matching `perturbation_interval` and `checkpoint_interval` + # (e.g. checkpoint every 4 steps, and perturb on those same steps) + # or making `perturbation_interval` a multiple of `checkpoint_interval` + # (e.g. checkpoint every 2 steps, and perturb every 4 steps). + # This is to ensure that the lastest checkpoints are being used by PBT + # when trials decide to exploit. If checkpointing and perturbing are not + # aligned, then PBT may use a stale checkpoint to resume from. + checkpoint_config=train.CheckpointConfig( + checkpoint_frequency=perturbation_interval, + checkpoint_score_attribute="mean_accuracy", + num_to_keep=4, + ), + ), + tune_config=tune.TuneConfig( + scheduler=pbt, + metric="mean_accuracy", + mode="max", + reuse_actors=True, + num_samples=8, + ), + param_space={ + "lr": 0.0001, + # note: this parameter is perturbed but has no effect on + # the model training in this example + "some_other_factor": 1, + }, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_function.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_function.py new file mode 100644 index 0000000000000000000000000000000000000000..b49740028fba4903d8107b24c8f613c9f78db5b6 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_function.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python + +import argparse +import json +import os +import random +import tempfile + +import numpy as np + +import ray +from ray import train, tune +from ray.train import Checkpoint +from ray.tune.schedulers import PopulationBasedTraining + + +def pbt_function(config): + """Toy PBT problem for benchmarking adaptive learning rate. + + The goal is to optimize this trainable's accuracy. The accuracy increases + fastest at the optimal lr, which is a function of the current accuracy. + + The optimal lr schedule for this problem is the triangle wave as follows. + Note that many lr schedules for real models also follow this shape: + + best lr + ^ + | /\ + | / \ + | / \ + | / \ + ------------> accuracy + + In this problem, using PBT with a population of 2-4 is sufficient to + roughly approximate this lr schedule. Higher population sizes will yield + faster convergence. Training will not converge without PBT. + """ + lr = config["lr"] + checkpoint_interval = config.get("checkpoint_interval", 1) + + accuracy = 0.0 # end = 1000 + + # NOTE: See below why step is initialized to 1 + step = 1 + checkpoint = train.get_checkpoint() + if checkpoint: + with checkpoint.as_directory() as checkpoint_dir: + with open(os.path.join(checkpoint_dir, "checkpoint.json"), "r") as f: + checkpoint_dict = json.load(f) + + accuracy = checkpoint_dict["acc"] + last_step = checkpoint_dict["step"] + # Current step should be 1 more than the last checkpoint step + step = last_step + 1 + + # triangle wave: + # - start at 0.001 @ t=0, + # - peak at 0.01 @ t=midpoint, + # - end at 0.001 @ t=midpoint * 2, + midpoint = 100 # lr starts decreasing after acc > midpoint + q_tolerance = 3 # penalize exceeding lr by more than this multiple + noise_level = 2 # add gaussian noise to the acc increase + + # Let `stop={"done": True}` in the configs below handle trial stopping + while True: + if accuracy < midpoint: + optimal_lr = 0.01 * accuracy / midpoint + else: + optimal_lr = 0.01 - 0.01 * (accuracy - midpoint) / midpoint + optimal_lr = min(0.01, max(0.001, optimal_lr)) + + # compute accuracy increase + q_err = max(lr, optimal_lr) / min(lr, optimal_lr) + if q_err < q_tolerance: + accuracy += (1.0 / q_err) * random.random() + elif lr > optimal_lr: + accuracy -= (q_err - q_tolerance) * random.random() + accuracy += noise_level * np.random.normal() + accuracy = max(0, accuracy) + + metrics = { + "mean_accuracy": accuracy, + "cur_lr": lr, + "optimal_lr": optimal_lr, # for debugging + "q_err": q_err, # for debugging + "done": accuracy > midpoint * 2, # this stops the training process + } + + if step % checkpoint_interval == 0: + # Checkpoint every `checkpoint_interval` steps + # NOTE: if we initialized `step=0` above, our checkpointing and perturbing + # would be out of sync by 1 step. + # Ex: if `checkpoint_interval` = `perturbation_interval` = 3 + # step: 0 (checkpoint) 1 2 3 (checkpoint) + # training_iteration: 1 2 3 (perturb) 4 + with tempfile.TemporaryDirectory() as tempdir: + with open(os.path.join(tempdir, "checkpoint.json"), "w") as f: + checkpoint_dict = {"acc": accuracy, "step": step} + json.dump(checkpoint_dict, f) + train.report(metrics, checkpoint=Checkpoint.from_directory(tempdir)) + else: + train.report(metrics) + step += 1 + + +def run_tune_pbt(smoke_test=False): + perturbation_interval = 5 + pbt = PopulationBasedTraining( + time_attr="training_iteration", + perturbation_interval=perturbation_interval, + hyperparam_mutations={ + # distribution for resampling + "lr": tune.uniform(0.0001, 0.02), + # allow perturbations within this set of categorical values + "some_other_factor": [1, 2], + }, + ) + + tuner = tune.Tuner( + pbt_function, + run_config=train.RunConfig( + name="pbt_function_api_example", + verbose=False, + stop={ + # Stop when done = True or at some # of train steps + # (whichever comes first) + "done": True, + "training_iteration": 10 if smoke_test else 1000, + }, + failure_config=train.FailureConfig( + fail_fast=True, + ), + checkpoint_config=train.CheckpointConfig( + checkpoint_score_attribute="mean_accuracy", + num_to_keep=2, + ), + ), + tune_config=tune.TuneConfig( + scheduler=pbt, + metric="mean_accuracy", + mode="max", + num_samples=8, + reuse_actors=True, + ), + param_space={ + "lr": 0.0001, + # Note: `some_other_factor` is perturbed because it is specified under + # the PBT scheduler's `hyperparam_mutations` argument, but has no effect on + # the model training in this example + "some_other_factor": 1, + # Note: `checkpoint_interval` will not be perturbed (since it's not + # included above), and it will be used to determine how many steps to take + # between each checkpoint. + # We recommend matching `perturbation_interval` and `checkpoint_interval` + # (e.g. checkpoint every 4 steps, and perturb on those same steps) + # or making `perturbation_interval` a multiple of `checkpoint_interval` + # (e.g. checkpoint every 2 steps, and perturb every 4 steps). + # This is to ensure that the lastest checkpoints are being used by PBT + # when trials decide to exploit. If checkpointing and perturbing are not + # aligned, then PBT may use a stale checkpoint to resume from. + "checkpoint_interval": perturbation_interval, + }, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", + action="store_true", + default=False, + help="Finish quickly for testing", + ) + args, _ = parser.parse_known_args() + if args.smoke_test: + ray.init(num_cpus=2) # force pausing to happen for test + + run_tune_pbt(smoke_test=args.smoke_test) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_memnn_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_memnn_example.py new file mode 100644 index 0000000000000000000000000000000000000000..266a705dd9a7c58666dd344db0588dc36cd43be0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_memnn_example.py @@ -0,0 +1,325 @@ +"""Example training a memory neural net on the bAbI dataset. + +References Keras and is based off of https://keras.io/examples/babi_memnn/. +""" + +from __future__ import print_function + +import argparse +import os +import re +import sys +import tarfile + +import numpy as np +from filelock import FileLock + +from ray import train, tune + +if sys.version_info >= (3, 12): + # Skip this test in Python 3.12+ because TensorFlow is not supported. + sys.exit(0) +else: + from tensorflow.keras.layers import ( + LSTM, + Activation, + Dense, + Dropout, + Embedding, + Input, + Permute, + add, + concatenate, + dot, + ) + from tensorflow.keras.models import Model, Sequential, load_model + from tensorflow.keras.optimizers import RMSprop + from tensorflow.keras.preprocessing.sequence import pad_sequences + from tensorflow.keras.utils import get_file + + +def tokenize(sent): + """Return the tokens of a sentence including punctuation. + + >>> tokenize("Bob dropped the apple. Where is the apple?") + ["Bob", "dropped", "the", "apple", ".", "Where", "is", "the", "apple", "?"] + """ + return [x.strip() for x in re.split(r"(\W+)?", sent) if x and x.strip()] + + +def parse_stories(lines, only_supporting=False): + """Parse stories provided in the bAbi tasks format + + If only_supporting is true, only the sentences + that support the answer are kept. + """ + data = [] + story = [] + for line in lines: + line = line.decode("utf-8").strip() + nid, line = line.split(" ", 1) + nid = int(nid) + if nid == 1: + story = [] + if "\t" in line: + q, a, supporting = line.split("\t") + q = tokenize(q) + if only_supporting: + # Only select the related substory + supporting = map(int, supporting.split()) + substory = [story[i - 1] for i in supporting] + else: + # Provide all the substories + substory = [x for x in story if x] + data.append((substory, q, a)) + story.append("") + else: + sent = tokenize(line) + story.append(sent) + return data + + +def get_stories(f, only_supporting=False, max_length=None): + """Given a file name, read the file, + retrieve the stories, + and then convert the sentences into a single story. + + If max_length is supplied, + any stories longer than max_length tokens will be discarded. + """ + + def flatten(data): + return sum(data, []) + + data = parse_stories(f.readlines(), only_supporting=only_supporting) + data = [ + (flatten(story), q, answer) + for story, q, answer in data + if not max_length or len(flatten(story)) < max_length + ] + return data + + +def vectorize_stories(word_idx, story_maxlen, query_maxlen, data): + inputs, queries, answers = [], [], [] + for story, query, answer in data: + inputs.append([word_idx[w] for w in story]) + queries.append([word_idx[w] for w in query]) + answers.append(word_idx[answer]) + return ( + pad_sequences(inputs, maxlen=story_maxlen), + pad_sequences(queries, maxlen=query_maxlen), + np.array(answers), + ) + + +def read_data(finish_fast=False): + # Get the file + try: + path = get_file( + "babi-tasks-v1-2.tar.gz", + origin="https://s3.amazonaws.com/text-datasets/" + "babi_tasks_1-20_v1-2.tar.gz", + ) + except Exception: + print( + "Error downloading dataset, please download it manually:\n" + "$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2" # noqa: E501 + ".tar.gz\n" + "$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz" # noqa: E501 + ) + raise + + # Choose challenge + challenges = { + # QA1 with 10,000 samples + "single_supporting_fact_10k": "tasks_1-20_v1-2/en-10k/qa1_" + "single-supporting-fact_{}.txt", + # QA2 with 10,000 samples + "two_supporting_facts_10k": "tasks_1-20_v1-2/en-10k/qa2_" + "two-supporting-facts_{}.txt", + } + challenge_type = "single_supporting_fact_10k" + challenge = challenges[challenge_type] + + with tarfile.open(path) as tar: + train_stories = get_stories(tar.extractfile(challenge.format("train"))) + test_stories = get_stories(tar.extractfile(challenge.format("test"))) + if finish_fast: + train_stories = train_stories[:64] + test_stories = test_stories[:64] + return train_stories, test_stories + + +class MemNNModel(tune.Trainable): + def build_model(self): + """Helper method for creating the model""" + vocab = set() + for story, q, answer in self.train_stories + self.test_stories: + vocab |= set(story + q + [answer]) + vocab = sorted(vocab) + + # Reserve 0 for masking via pad_sequences + vocab_size = len(vocab) + 1 + story_maxlen = max(len(x) for x, _, _ in self.train_stories + self.test_stories) + query_maxlen = max(len(x) for _, x, _ in self.train_stories + self.test_stories) + + word_idx = {c: i + 1 for i, c in enumerate(vocab)} + self.inputs_train, self.queries_train, self.answers_train = vectorize_stories( + word_idx, story_maxlen, query_maxlen, self.train_stories + ) + self.inputs_test, self.queries_test, self.answers_test = vectorize_stories( + word_idx, story_maxlen, query_maxlen, self.test_stories + ) + + # placeholders + input_sequence = Input((story_maxlen,)) + question = Input((query_maxlen,)) + + # encoders + # embed the input sequence into a sequence of vectors + input_encoder_m = Sequential() + input_encoder_m.add(Embedding(input_dim=vocab_size, output_dim=64)) + input_encoder_m.add(Dropout(self.config.get("dropout", 0.3))) + # output: (samples, story_maxlen, embedding_dim) + + # embed the input into a sequence of vectors of size query_maxlen + input_encoder_c = Sequential() + input_encoder_c.add(Embedding(input_dim=vocab_size, output_dim=query_maxlen)) + input_encoder_c.add(Dropout(self.config.get("dropout", 0.3))) + # output: (samples, story_maxlen, query_maxlen) + + # embed the question into a sequence of vectors + question_encoder = Sequential() + question_encoder.add( + Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen) + ) + question_encoder.add(Dropout(self.config.get("dropout", 0.3))) + # output: (samples, query_maxlen, embedding_dim) + + # encode input sequence and questions (which are indices) + # to sequences of dense vectors + input_encoded_m = input_encoder_m(input_sequence) + input_encoded_c = input_encoder_c(input_sequence) + question_encoded = question_encoder(question) + + # compute a "match" between the first input vector sequence + # and the question vector sequence + # shape: `(samples, story_maxlen, query_maxlen)` + match = dot([input_encoded_m, question_encoded], axes=(2, 2)) + match = Activation("softmax")(match) + + # add the match matrix with the second input vector sequence + response = add( + [match, input_encoded_c] + ) # (samples, story_maxlen, query_maxlen) + response = Permute((2, 1))(response) # (samples, query_maxlen, story_maxlen) + + # concatenate the match matrix with the question vector sequence + answer = concatenate([response, question_encoded]) + + # the original paper uses a matrix multiplication. + # we choose to use a RNN instead. + answer = LSTM(32)(answer) # (samples, 32) + + # one regularization layer -- more would probably be needed. + answer = Dropout(self.config.get("dropout", 0.3))(answer) + answer = Dense(vocab_size)(answer) # (samples, vocab_size) + # we output a probability distribution over the vocabulary + answer = Activation("softmax")(answer) + + # build the final model + model = Model([input_sequence, question], answer) + return model + + def setup(self, config): + with FileLock(os.path.expanduser("~/.tune.lock")): + self.train_stories, self.test_stories = read_data(config["finish_fast"]) + model = self.build_model() + rmsprop = RMSprop( + lr=self.config.get("lr", 1e-3), rho=self.config.get("rho", 0.9) + ) + model.compile( + optimizer=rmsprop, + loss="sparse_categorical_crossentropy", + metrics=["accuracy"], + ) + self.model = model + + def step(self): + # train + self.model.fit( + [self.inputs_train, self.queries_train], + self.answers_train, + batch_size=self.config.get("batch_size", 32), + epochs=self.config.get("epochs", 1), + validation_data=([self.inputs_test, self.queries_test], self.answers_test), + verbose=0, + ) + _, accuracy = self.model.evaluate( + [self.inputs_train, self.queries_train], self.answers_train, verbose=0 + ) + return {"mean_accuracy": accuracy} + + def save_checkpoint(self, checkpoint_dir): + file_path = checkpoint_dir + "/model" + self.model.save(file_path) + + def load_checkpoint(self, checkpoint_dir): + # See https://stackoverflow.com/a/42763323 + del self.model + file_path = checkpoint_dir + "/model" + self.model = load_model(file_path) + + +if __name__ == "__main__": + import ray + from ray.tune.schedulers import PopulationBasedTraining + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + if args.smoke_test: + ray.init(num_cpus=2) + + perturbation_interval = 2 + pbt = PopulationBasedTraining( + perturbation_interval=perturbation_interval, + hyperparam_mutations={ + "dropout": lambda: np.random.uniform(0, 1), + "lr": lambda: 10 ** np.random.randint(-10, 0), + "rho": lambda: np.random.uniform(0, 1), + }, + ) + + tuner = tune.Tuner( + MemNNModel, + run_config=train.RunConfig( + name="pbt_babi_memnn", + stop={"training_iteration": 4 if args.smoke_test else 100}, + checkpoint_config=train.CheckpointConfig( + checkpoint_frequency=perturbation_interval, + checkpoint_score_attribute="mean_accuracy", + num_to_keep=2, + ), + ), + tune_config=tune.TuneConfig( + scheduler=pbt, + metric="mean_accuracy", + mode="max", + num_samples=2, + reuse_actors=True, + ), + param_space={ + "finish_fast": args.smoke_test, + "batch_size": 32, + "epochs": 1, + "dropout": 0.3, + "lr": 0.01, + "rho": 0.9, + }, + ) + tuner.fit() diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_ppo_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_ppo_example.py new file mode 100644 index 0000000000000000000000000000000000000000..bcdfdff6072ffa9f4c5d336390264b4ddcd2f7b9 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_ppo_example.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +"""Example of using PBT with RLlib. + +Note that this requires a cluster with at least 8 GPUs in order for all trials +to run concurrently, otherwise PBT will round-robin train the trials which +is less efficient (or you can set {"gpu": 0} to use CPUs for SGD instead). + +Note that Tune in general does not need 8 GPUs, and this is just a more +computationally demanding example. +""" + +import random + +from ray import train, tune +from ray.rllib.algorithms.ppo import PPO +from ray.tune.schedulers import PopulationBasedTraining + +if __name__ == "__main__": + # Postprocess the perturbed config to ensure it's still valid + def explore(config): + # ensure we collect enough timesteps to do sgd + if config["train_batch_size"] < config["sgd_minibatch_size"] * 2: + config["train_batch_size"] = config["sgd_minibatch_size"] * 2 + # ensure we run at least one sgd iter + if config["num_sgd_iter"] < 1: + config["num_sgd_iter"] = 1 + return config + + pbt = PopulationBasedTraining( + time_attr="time_total_s", + perturbation_interval=120, + resample_probability=0.25, + # Specifies the mutations of these hyperparams + hyperparam_mutations={ + "lambda": lambda: random.uniform(0.9, 1.0), + "clip_param": lambda: random.uniform(0.01, 0.5), + "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], + "num_sgd_iter": lambda: random.randint(1, 30), + "sgd_minibatch_size": lambda: random.randint(128, 16384), + "train_batch_size": lambda: random.randint(2000, 160000), + }, + custom_explore_fn=explore, + ) + + tuner = tune.Tuner( + PPO, + run_config=train.RunConfig( + name="pbt_humanoid_test", + ), + tune_config=tune.TuneConfig( + scheduler=pbt, + num_samples=8, + metric="episode_reward_mean", + mode="max", + reuse_actors=True, + ), + param_space={ + "env": "Humanoid-v1", + "kl_coeff": 1.0, + "num_workers": 8, + "num_gpus": 1, + "model": {"free_log_std": True}, + # These params are tuned from a fixed starting value. + "lambda": 0.95, + "clip_param": 0.2, + "lr": 1e-4, + # These params start off randomly drawn from a set. + "num_sgd_iter": tune.choice([10, 20, 30]), + "sgd_minibatch_size": tune.choice([128, 512, 2048]), + "train_batch_size": tune.choice([10000, 20000, 40000]), + }, + ) + results = tuner.fit() + + print("best hyperparameters: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/__init__.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36fe4155513a30aeefc5d1930e8706245ed95d94 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/__pycache__/pbt_transformers.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/__pycache__/pbt_transformers.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d84711110ce32f0dd8498e461023a7cba84af1f Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/__pycache__/pbt_transformers.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/__pycache__/utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dcce9edbf08afb83e454128148cbfb7d2c45f358 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/__pycache__/utils.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/pbt_transformers.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/pbt_transformers.py new file mode 100644 index 0000000000000000000000000000000000000000..eb8a9d9a1b79ac47b6acdc0ff4b6ac3a9b755ca5 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/pbt_transformers.py @@ -0,0 +1,166 @@ +""" +This example is uses the official +huggingface transformers `hyperparameter_search` API. +""" + +import os + +from transformers import ( + AutoConfig, + AutoModelForSequenceClassification, + AutoTokenizer, + GlueDataset, + GlueDataTrainingArguments, + Trainer, + TrainingArguments, + glue_tasks_num_labels, +) + +from ray import tune +from ray.train import CheckpointConfig +from ray.tune import CLIReporter +from ray.tune.examples.pbt_transformers.utils import ( + build_compute_metrics_fn, + download_data, +) +from ray.tune.schedulers import PopulationBasedTraining + + +def tune_transformer(num_samples=8, gpus_per_trial=0, smoke_test=False): + data_dir_name = "./data" if not smoke_test else "./test_data" + data_dir = os.path.abspath(os.path.join(os.getcwd(), data_dir_name)) + if not os.path.exists(data_dir): + os.mkdir(data_dir, 0o755) + + # Change these as needed. + model_name = ( + "bert-base-uncased" if not smoke_test else "sshleifer/tiny-distilroberta-base" + ) + task_name = "rte" + + task_data_dir = os.path.join(data_dir, task_name.upper()) + + num_labels = glue_tasks_num_labels[task_name] + + config = AutoConfig.from_pretrained( + model_name, num_labels=num_labels, finetuning_task=task_name + ) + + # Download and cache tokenizer, model, and features + print("Downloading and caching Tokenizer") + tokenizer = AutoTokenizer.from_pretrained(model_name) + + # Triggers tokenizer download to cache + print("Downloading and caching pre-trained model") + AutoModelForSequenceClassification.from_pretrained( + model_name, + config=config, + ) + + def get_model(): + return AutoModelForSequenceClassification.from_pretrained( + model_name, + config=config, + ) + + # Download data. + download_data(task_name, data_dir) + + data_args = GlueDataTrainingArguments(task_name=task_name, data_dir=task_data_dir) + + train_dataset = GlueDataset( + data_args, tokenizer=tokenizer, mode="train", cache_dir=task_data_dir + ) + eval_dataset = GlueDataset( + data_args, tokenizer=tokenizer, mode="dev", cache_dir=task_data_dir + ) + + training_args = TrainingArguments( + output_dir=".", + learning_rate=1e-5, # config + do_train=True, + do_eval=True, + no_cuda=gpus_per_trial <= 0, + evaluation_strategy="epoch", + save_strategy="epoch", + load_best_model_at_end=True, + num_train_epochs=2, # config + max_steps=-1, + per_device_train_batch_size=16, # config + per_device_eval_batch_size=16, # config + warmup_steps=0, + weight_decay=0.1, # config + logging_dir="./logs", + skip_memory_metrics=True, + report_to="none", + ) + + trainer = Trainer( + model_init=get_model, + args=training_args, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + compute_metrics=build_compute_metrics_fn(task_name), + ) + + tune_config = { + "per_device_train_batch_size": 32, + "per_device_eval_batch_size": 32, + "num_train_epochs": tune.choice([2, 3, 4, 5]), + "max_steps": 1 if smoke_test else -1, # Used for smoke test. + } + + scheduler = PopulationBasedTraining( + time_attr="training_iteration", + metric="eval_acc", + mode="max", + perturbation_interval=1, + hyperparam_mutations={ + "weight_decay": tune.uniform(0.0, 0.3), + "learning_rate": tune.uniform(1e-5, 5e-5), + "per_device_train_batch_size": [16, 32, 64], + }, + ) + + reporter = CLIReporter( + parameter_columns={ + "weight_decay": "w_decay", + "learning_rate": "lr", + "per_device_train_batch_size": "train_bs/gpu", + "num_train_epochs": "num_epochs", + }, + metric_columns=["eval_acc", "eval_loss", "epoch", "training_iteration"], + ) + + trainer.hyperparameter_search( + hp_space=lambda _: tune_config, + backend="ray", + n_trials=num_samples, + resources_per_trial={"cpu": 1, "gpu": gpus_per_trial}, + scheduler=scheduler, + checkpoint_config=CheckpointConfig( + num_to_keep=1, + checkpoint_score_attribute="training_iteration", + ), + stop={"training_iteration": 1} if smoke_test else None, + progress_reporter=reporter, + local_dir="~/ray_results/", + name="tune_transformer_pbt", + log_to_file=True, + ) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + if args.smoke_test: + tune_transformer(num_samples=1, gpus_per_trial=0, smoke_test=True) + else: + # You can change the number of GPUs here: + tune_transformer(num_samples=8, gpus_per_trial=1) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/utils.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ff304664b98b2fe7835d18a5f7dc33a4a6563fd8 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_transformers/utils.py @@ -0,0 +1,46 @@ +"""Utilities to load and cache data.""" + +import os +from typing import Callable, Dict + +import numpy as np +from transformers import EvalPrediction, glue_compute_metrics, glue_output_modes + + +def build_compute_metrics_fn(task_name: str) -> Callable[[EvalPrediction], Dict]: + """Function from transformers/examples/text-classification/run_glue.py""" + output_mode = glue_output_modes[task_name] + + def compute_metrics_fn(p: EvalPrediction): + if output_mode == "classification": + preds = np.argmax(p.predictions, axis=1) + elif output_mode == "regression": + preds = np.squeeze(p.predictions) + metrics = glue_compute_metrics(task_name, preds, p.label_ids) + return metrics + + return compute_metrics_fn + + +def download_data(task_name, data_dir="./data"): + # Download RTE training data + print("Downloading dataset.") + import urllib + import zipfile + + if task_name == "rte": + url = "https://dl.fbaipublicfiles.com/glue/data/RTE.zip" + else: + raise ValueError("Unknown task: {}".format(task_name)) + data_file = os.path.join(data_dir, "{}.zip".format(task_name)) + if not os.path.exists(data_file): + urllib.request.urlretrieve(url, data_file) + with zipfile.ZipFile(data_file) as zip_ref: + zip_ref.extractall(data_dir) + print("Downloaded data for task {} to {}".format(task_name, data_dir)) + else: + print( + "Data already exists. Using downloaded data for task {} from {}".format( + task_name, data_dir + ) + ) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_tune_cifar10_with_keras.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_tune_cifar10_with_keras.py new file mode 100644 index 0000000000000000000000000000000000000000..4b89d8fc563d4f866bf4655fd3097df22c7342aa --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/pbt_tune_cifar10_with_keras.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Train keras CNN on the CIFAR10 small images dataset. + +The model comes from: https://zhuanlan.zhihu.com/p/29214791, +and it gets to about 87% validation accuracy in 100 epochs. + +Note that the script requires a machine with 4 GPUs. You +can set {"gpu": 0} to use CPUs for training, although +it is less efficient. +""" + +from __future__ import print_function + +import argparse + +import numpy as np +import tensorflow as tf +from tensorflow.keras.datasets import cifar10 +from tensorflow.keras.layers import ( + Convolution2D, + Dense, + Dropout, + Flatten, + Input, + MaxPooling2D, +) +from tensorflow.keras.models import Model, load_model +from tensorflow.keras.preprocessing.image import ImageDataGenerator + +from ray import train, tune +from ray.tune import Trainable +from ray.tune.schedulers import PopulationBasedTraining + +num_classes = 10 +NUM_SAMPLES = 128 + + +class Cifar10Model(Trainable): + def _read_data(self): + # The data, split between train and test sets: + (x_train, y_train), (x_test, y_test) = cifar10.load_data() + + # Convert class vectors to binary class matrices. + y_train = tf.keras.utils.to_categorical(y_train, num_classes) + y_test = tf.keras.utils.to_categorical(y_test, num_classes) + + x_train = x_train.astype("float32") + x_train /= 255 + x_test = x_test.astype("float32") + x_test /= 255 + + return (x_train, y_train), (x_test, y_test) + + def _build_model(self, input_shape): + x = Input(shape=(32, 32, 3)) + y = x + y = Convolution2D( + filters=64, + kernel_size=3, + strides=1, + padding="same", + activation="relu", + kernel_initializer="he_normal", + )(y) + y = Convolution2D( + filters=64, + kernel_size=3, + strides=1, + padding="same", + activation="relu", + kernel_initializer="he_normal", + )(y) + y = MaxPooling2D(pool_size=2, strides=2, padding="same")(y) + + y = Convolution2D( + filters=128, + kernel_size=3, + strides=1, + padding="same", + activation="relu", + kernel_initializer="he_normal", + )(y) + y = Convolution2D( + filters=128, + kernel_size=3, + strides=1, + padding="same", + activation="relu", + kernel_initializer="he_normal", + )(y) + y = MaxPooling2D(pool_size=2, strides=2, padding="same")(y) + + y = Convolution2D( + filters=256, + kernel_size=3, + strides=1, + padding="same", + activation="relu", + kernel_initializer="he_normal", + )(y) + y = Convolution2D( + filters=256, + kernel_size=3, + strides=1, + padding="same", + activation="relu", + kernel_initializer="he_normal", + )(y) + y = MaxPooling2D(pool_size=2, strides=2, padding="same")(y) + + y = Flatten()(y) + y = Dropout(self.config.get("dropout", 0.5))(y) + y = Dense(units=10, activation="softmax", kernel_initializer="he_normal")(y) + + model = Model(inputs=x, outputs=y, name="model1") + return model + + def setup(self, config): + self.train_data, self.test_data = self._read_data() + x_train = self.train_data[0] + model = self._build_model(x_train.shape[1:]) + + opt = tf.keras.optimizers.Adadelta( + lr=self.config.get("lr", 1e-4), weight_decay=self.config.get("decay", 1e-4) + ) + model.compile( + loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"] + ) + self.model = model + + def step(self): + x_train, y_train = self.train_data + x_train, y_train = x_train[:NUM_SAMPLES], y_train[:NUM_SAMPLES] + x_test, y_test = self.test_data + x_test, y_test = x_test[:NUM_SAMPLES], y_test[:NUM_SAMPLES] + + aug_gen = ImageDataGenerator( + # set input mean to 0 over the dataset + featurewise_center=False, + # set each sample mean to 0 + samplewise_center=False, + # divide inputs by dataset std + featurewise_std_normalization=False, + # divide each input by its std + samplewise_std_normalization=False, + # apply ZCA whitening + zca_whitening=False, + # randomly rotate images in the range (degrees, 0 to 180) + rotation_range=0, + # randomly shift images horizontally (fraction of total width) + width_shift_range=0.1, + # randomly shift images vertically (fraction of total height) + height_shift_range=0.1, + # randomly flip images + horizontal_flip=True, + # randomly flip images + vertical_flip=False, + ) + + aug_gen.fit(x_train) + batch_size = self.config.get("batch_size", 64) + gen = aug_gen.flow(x_train, y_train, batch_size=batch_size) + self.model.fit_generator( + generator=gen, epochs=self.config.get("epochs", 1), validation_data=None + ) + + # loss, accuracy + _, accuracy = self.model.evaluate(x_test, y_test, verbose=0) + return {"mean_accuracy": accuracy} + + def save_checkpoint(self, checkpoint_dir): + file_path = checkpoint_dir + "/model" + self.model.save(file_path) + + def load_checkpoint(self, checkpoint_dir): + # See https://stackoverflow.com/a/42763323 + del self.model + file_path = checkpoint_dir + "/model" + self.model = load_model(file_path) + + def cleanup(self): + # If need, save your model when exit. + # saved_path = self.model.save(self.logdir) + # print("save model at: ", saved_path) + pass + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + space = { + "epochs": 1, + "batch_size": 64, + "lr": tune.grid_search([10**-4, 10**-5]), + "decay": tune.sample_from(lambda spec: spec.config.lr / 100.0), + "dropout": tune.grid_search([0.25, 0.5]), + } + if args.smoke_test: + space["lr"] = 10**-4 + space["dropout"] = 0.5 + + perturbation_interval = 10 + pbt = PopulationBasedTraining( + time_attr="training_iteration", + perturbation_interval=perturbation_interval, + hyperparam_mutations={ + "dropout": lambda _: np.random.uniform(0, 1), + }, + ) + + tuner = tune.Tuner( + tune.with_resources( + Cifar10Model, + resources={"cpu": 1, "gpu": 1}, + ), + run_config=train.RunConfig( + name="pbt_cifar10", + stop={ + "mean_accuracy": 0.80, + "training_iteration": 30, + }, + checkpoint_config=train.CheckpointConfig( + checkpoint_frequency=perturbation_interval, + checkpoint_score_attribute="mean_accuracy", + num_to_keep=2, + ), + ), + tune_config=tune.TuneConfig( + scheduler=pbt, + num_samples=4, + metric="mean_accuracy", + mode="max", + reuse_actors=True, + ), + param_space=space, + ) + results = tuner.fit() + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/tf_mnist_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/tf_mnist_example.py new file mode 100644 index 0000000000000000000000000000000000000000..0fcda6a458f77285d2b523628acb6693e2ecb8ae --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/tf_mnist_example.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python +# coding: utf-8 +# +# This example showcases how to use TF2.0 APIs with Tune. +# Original code: https://www.tensorflow.org/tutorials/quickstart/advanced +# +# As of 10/12/2019: One caveat of using TF2.0 is that TF AutoGraph +# functionality does not interact nicely with Ray actors. One way to get around +# this is to `import tensorflow` inside the Tune Trainable. +# + +import argparse +import os +import sys + +from filelock import FileLock + +from ray import train, tune + +MAX_TRAIN_BATCH = 10 + +if sys.version_info >= (3, 12): + # Tensorflow is not installed for Python 3.12 because of keras compatibility. + sys.exit(0) +else: + from tensorflow.keras import Model + from tensorflow.keras.datasets.mnist import load_data + from tensorflow.keras.layers import Conv2D, Dense, Flatten + + +class MyModel(Model): + def __init__(self, hiddens=128): + super(MyModel, self).__init__() + self.conv1 = Conv2D(32, 3, activation="relu") + self.flatten = Flatten() + self.d1 = Dense(hiddens, activation="relu") + self.d2 = Dense(10, activation="softmax") + + def call(self, x): + x = self.conv1(x) + x = self.flatten(x) + x = self.d1(x) + return self.d2(x) + + +class MNISTTrainable(tune.Trainable): + def setup(self, config): + # IMPORTANT: See the above note. + import tensorflow as tf + + # Use FileLock to avoid race conditions. + with FileLock(os.path.expanduser("~/.tune.lock")): + (x_train, y_train), (x_test, y_test) = load_data() + x_train, x_test = x_train / 255.0, x_test / 255.0 + + # Add a channels dimension + x_train = x_train[..., tf.newaxis] + x_test = x_test[..., tf.newaxis] + self.train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)) + self.train_ds = self.train_ds.shuffle(10000).batch(config.get("batch", 32)) + + self.test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32) + + self.model = MyModel(hiddens=config.get("hiddens", 128)) + self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy() + self.optimizer = tf.keras.optimizers.Adam() + self.train_loss = tf.keras.metrics.Mean(name="train_loss") + self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( + name="train_accuracy" + ) + + self.test_loss = tf.keras.metrics.Mean(name="test_loss") + self.test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( + name="test_accuracy" + ) + + @tf.function + def train_step(images, labels): + with tf.GradientTape() as tape: + predictions = self.model(images) + loss = self.loss_object(labels, predictions) + gradients = tape.gradient(loss, self.model.trainable_variables) + self.optimizer.apply_gradients( + zip(gradients, self.model.trainable_variables) + ) + + self.train_loss(loss) + self.train_accuracy(labels, predictions) + + @tf.function + def test_step(images, labels): + predictions = self.model(images) + t_loss = self.loss_object(labels, predictions) + + self.test_loss(t_loss) + self.test_accuracy(labels, predictions) + + self.tf_train_step = train_step + self.tf_test_step = test_step + + def save_checkpoint(self, checkpoint_dir: str): + return None + + def load_checkpoint(self, checkpoint): + return None + + def step(self): + self.train_loss.reset_states() + self.train_accuracy.reset_states() + self.test_loss.reset_states() + self.test_accuracy.reset_states() + + for idx, (images, labels) in enumerate(self.train_ds): + if idx > MAX_TRAIN_BATCH: # This is optional and can be removed. + break + self.tf_train_step(images, labels) + + for test_images, test_labels in self.test_ds: + self.tf_test_step(test_images, test_labels) + + # It is important to return tf.Tensors as numpy objects. + return { + "epoch": self.iteration, + "loss": self.train_loss.result().numpy(), + "accuracy": self.train_accuracy.result().numpy() * 100, + "test_loss": self.test_loss.result().numpy(), + "mean_accuracy": self.test_accuracy.result().numpy() * 100, + } + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + tuner = tune.Tuner( + MNISTTrainable, + tune_config=tune.TuneConfig( + metric="test_loss", + mode="min", + ), + run_config=train.RunConfig( + stop={"training_iteration": 5 if args.smoke_test else 50}, + verbose=1, + ), + param_space={"hiddens": tune.grid_search([32, 64, 128])}, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/tune_basic_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/tune_basic_example.py new file mode 100644 index 0000000000000000000000000000000000000000..f6c3205b943608ebaf07a275859839b90a31ce79 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/tune_basic_example.py @@ -0,0 +1,57 @@ +"""This example demonstrates basic Ray Tune random search and grid search.""" +import time + +import ray +from ray import train, tune + + +def evaluation_fn(step, width, height): + time.sleep(0.1) + return (0.1 + width * step / 100) ** (-1) + height * 0.1 + + +def easy_objective(config): + # Hyperparameters + width, height = config["width"], config["height"] + + for step in range(config["steps"]): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Feed the score back back to Tune. + train.report({"iterations": step, "mean_loss": intermediate_score}) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + ray.init(configure_logging=False) + + # This will do a grid search over the `activation` parameter. This means + # that each of the two values (`relu` and `tanh`) will be sampled once + # for each sample (`num_samples`). We end up with 2 * 50 = 100 samples. + # The `width` and `height` parameters are sampled randomly. + # `steps` is a constant parameter. + + tuner = tune.Tuner( + easy_objective, + tune_config=tune.TuneConfig( + metric="mean_loss", + mode="min", + num_samples=5 if args.smoke_test else 50, + ), + param_space={ + "steps": 5 if args.smoke_test else 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + "activation": tune.grid_search(["relu", "tanh"]), + }, + ) + results = tuner.fit() + + print("Best hyperparameters found were: ", results.get_best_result().config) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/tune_mnist_keras.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/tune_mnist_keras.py new file mode 100644 index 0000000000000000000000000000000000000000..88b72e6463e1f8751380ec1b8eec7ff15ae4ad2d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/tune_mnist_keras.py @@ -0,0 +1,99 @@ +import argparse +import os +import sys + +from filelock import FileLock + +import ray +from ray import train, tune +from ray.tune.schedulers import AsyncHyperBandScheduler + +if sys.version_info >= (3, 12): + # Tensorflow is not installed for Python 3.12 because of keras compatibility. + sys.exit(0) +else: + from tensorflow.keras.datasets import mnist + + from ray.air.integrations.keras import ReportCheckpointCallback + + +def train_mnist(config): + # https://github.com/tensorflow/tensorflow/issues/32159 + import tensorflow as tf + + batch_size = 128 + num_classes = 10 + epochs = 12 + + with FileLock(os.path.expanduser("~/.data.lock")): + (x_train, y_train), (x_test, y_test) = mnist.load_data() + x_train, x_test = x_train / 255.0, x_test / 255.0 + model = tf.keras.models.Sequential( + [ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(config["hidden"], activation="relu"), + tf.keras.layers.Dropout(0.2), + tf.keras.layers.Dense(num_classes, activation="softmax"), + ] + ) + + model.compile( + loss="sparse_categorical_crossentropy", + optimizer=tf.keras.optimizers.SGD(lr=config["lr"], momentum=config["momentum"]), + metrics=["accuracy"], + ) + + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=epochs, + verbose=0, + validation_data=(x_test, y_test), + callbacks=[ + ReportCheckpointCallback( + checkpoint_on=[], metrics={"mean_accuracy": "accuracy"} + ) + ], + ) + + +def tune_mnist(num_training_iterations): + sched = AsyncHyperBandScheduler( + time_attr="training_iteration", max_t=400, grace_period=20 + ) + + tuner = tune.Tuner( + tune.with_resources(train_mnist, resources={"cpu": 2, "gpu": 0}), + run_config=train.RunConfig( + name="exp", + stop={"mean_accuracy": 0.99, "training_iteration": num_training_iterations}, + ), + tune_config=tune.TuneConfig( + scheduler=sched, + metric="mean_accuracy", + mode="max", + num_samples=10, + ), + param_space={ + "threads": 2, + "lr": tune.uniform(0.001, 0.1), + "momentum": tune.uniform(0.1, 0.9), + "hidden": tune.randint(32, 512), + }, + ) + results = tuner.fit() + print("Best hyperparameters found were: ", results.get_best_result().config) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--smoke-test", action="store_true", help="Finish quickly for testing" + ) + args, _ = parser.parse_known_args() + + if args.smoke_test: + ray.init(num_cpus=4) + + tune_mnist(num_training_iterations=2 if args.smoke_test else 300) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/utils.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8d2eb8b700d2ed3a0bf797d6d385ccd5bd8f2580 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/utils.py @@ -0,0 +1,21 @@ +import tensorflow as tf +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import OneHotEncoder + + +def get_iris_data(test_size=0.2): + iris_data = load_iris() + x = iris_data.data + y = iris_data.target.reshape(-1, 1) + encoder = OneHotEncoder(sparse=False) + y = encoder.fit_transform(y) + train_x, test_x, train_y, test_y = train_test_split(x, y) + return train_x, train_y, test_x, test_y + + +def set_keras_threads(threads): + # We set threads here to avoid contention, as Keras + # is heavily parallelized across multiple cores. + tf.config.threading.set_inter_op_parallelism_threads(threads) + tf.config.threading.set_intra_op_parallelism_threads(threads) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/xgboost_dynamic_resources_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/xgboost_dynamic_resources_example.py new file mode 100644 index 0000000000000000000000000000000000000000..338da6ef87ea6570052d54539c2ae608e129e1fa --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/xgboost_dynamic_resources_example.py @@ -0,0 +1,183 @@ +from typing import TYPE_CHECKING, Any, Dict, Optional + +import sklearn.datasets +import sklearn.metrics +import xgboost as xgb +from sklearn.model_selection import train_test_split + +import ray +from ray import train, tune +from ray.tune.execution.placement_groups import PlacementGroupFactory +from ray.tune.experiment import Trial +from ray.tune.integration.xgboost import TuneReportCheckpointCallback +from ray.tune.schedulers import ASHAScheduler, ResourceChangingScheduler + +if TYPE_CHECKING: + from ray.tune.execution.tune_controller import TuneController + +CHECKPOINT_FILENAME = "booster-checkpoint.json" + + +def get_best_model_checkpoint(best_result: "ray.train.Result"): + best_bst = TuneReportCheckpointCallback.get_model( + best_result.checkpoint, filename=CHECKPOINT_FILENAME + ) + + accuracy = 1.0 - best_result.metrics["eval-logloss"] + print(f"Best model parameters: {best_result.config}") + print(f"Best model total accuracy: {accuracy:.4f}") + return best_bst + + +# our train function needs to be able to checkpoint +# to work with ResourceChangingScheduler +def train_breast_cancer(config: dict): + # This is a simple training function to be passed into Tune + # Load dataset + data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True) + # Split into train and test set + train_x, test_x, train_y, test_y = train_test_split(data, labels, test_size=0.25) + # Build input matrices for XGBoost + train_set = xgb.DMatrix(train_x, label=train_y) + test_set = xgb.DMatrix(test_x, label=test_y) + + # Checkpointing needs to be set up in order for dynamic + # resource allocation to work as intended + xgb_model = None + checkpoint = train.get_checkpoint() + if checkpoint: + xgb_model = TuneReportCheckpointCallback.get_model( + checkpoint, filename=CHECKPOINT_FILENAME + ) + + # Set `nthread` to the number of CPUs available to the trial, + # which is assigned by the scheduler. + config["nthread"] = int(train.get_context().get_trial_resources().head_cpus) + print(f"nthreads: {config['nthread']} xgb_model: {xgb_model}") + # Train the classifier, using the Tune callback + xgb.train( + config, + train_set, + evals=[(test_set, "eval")], + verbose_eval=False, + xgb_model=xgb_model, + callbacks=[ + TuneReportCheckpointCallback( + # checkpointing should happen every iteration + # with dynamic resource allocation + frequency=1, + filename=CHECKPOINT_FILENAME, + ) + ], + ) + + +def tune_xgboost(): + search_space = { + # You can mix constants with search space objects. + "objective": "binary:logistic", + "eval_metric": ["logloss", "error"], + "max_depth": 9, + "learning_rate": 1, + "min_child_weight": tune.grid_search([2, 3]), + "subsample": tune.grid_search([0.8, 0.9]), + "colsample_bynode": tune.grid_search([0.8, 0.9]), + "random_state": 1, + "num_parallel_tree": 2000, + } + # This will enable aggressive early stopping of bad trials. + base_scheduler = ASHAScheduler( + max_t=16, grace_period=1, reduction_factor=2 # 16 training iterations + ) + + def example_resources_allocation_function( + tune_controller: "TuneController", + trial: Trial, + result: Dict[str, Any], + scheduler: "ResourceChangingScheduler", + ) -> Optional[PlacementGroupFactory]: + """This is a basic example of a resource allocating function. + + The function naively balances available CPUs over live trials. + + This function returns a new ``PlacementGroupFactory`` with updated + resource requirements, or None. If the returned + ``PlacementGroupFactory`` is equal by value to the one the + trial has currently, the scheduler will skip the update process + internally (same with None). + + See :class:`DistributeResources` for a more complex, + robust approach. + + Args: + tune_controller: Trial runner for this Tune run. + Can be used to obtain information about other trials. + trial: The trial to allocate new resources to. + result: The latest results of trial. + scheduler: The scheduler calling the function. + """ + + # Get base trial resources as defined in + # ``tune.with_resources`` + base_trial_resource = scheduler._base_trial_resources + + # Don't bother if this is just the first iteration + if result["training_iteration"] < 1: + return None + + # default values if resources_per_trial is unspecified + if base_trial_resource is None: + base_trial_resource = PlacementGroupFactory([{"CPU": 1, "GPU": 0}]) + + # Assume that the number of CPUs cannot go below what was + # specified in ``Tuner.fit()``. + min_cpu = base_trial_resource.required_resources.get("CPU", 0) + + # Get the number of CPUs available in total (not just free) + total_available_cpus = tune_controller._resource_updater.get_num_cpus() + + # Divide the free CPUs among all live trials + cpu_to_use = max( + min_cpu, total_available_cpus // len(tune_controller.get_live_trials()) + ) + + # Assign new CPUs to the trial in a PlacementGroupFactory + return PlacementGroupFactory([{"CPU": cpu_to_use, "GPU": 0}]) + + # You can either define your own resources_allocation_function, or + # use the default one - DistributeResources + + # from ray.tune.schedulers.resource_changing_scheduler import \ + # DistributeResources + + scheduler = ResourceChangingScheduler( + base_scheduler=base_scheduler, + resources_allocation_function=example_resources_allocation_function, + # resources_allocation_function=DistributeResources() # default + ) + + tuner = tune.Tuner( + tune.with_resources( + train_breast_cancer, resources=PlacementGroupFactory([{"CPU": 1, "GPU": 0}]) + ), + tune_config=tune.TuneConfig( + metric="eval-logloss", + mode="min", + num_samples=1, + scheduler=scheduler, + ), + param_space=search_space, + ) + results = tuner.fit() + + return results.get_best_result() + + +if __name__ == "__main__": + ray.init(num_cpus=8) + + best_result = tune_xgboost() + best_bst = get_best_model_checkpoint(best_result) + + # You could now do further predictions with + # best_bst.predict(...) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/examples/xgboost_example.py b/.venv/lib/python3.11/site-packages/ray/tune/examples/xgboost_example.py new file mode 100644 index 0000000000000000000000000000000000000000..951ab897705615dc6cfda3a6dee4bd7b3aecb63c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/ray/tune/examples/xgboost_example.py @@ -0,0 +1,130 @@ +from typing import Dict, List + +import numpy as np +import sklearn.datasets +import sklearn.metrics +import xgboost as xgb +from sklearn.model_selection import train_test_split + +import ray +from ray import tune +from ray.tune.integration.xgboost import TuneReportCheckpointCallback +from ray.tune.schedulers import ASHAScheduler + +CHECKPOINT_FILENAME = "booster-checkpoint.json" + + +def train_breast_cancer(config: dict): + # This is a simple training function to be passed into Tune + + # Load dataset + data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True) + + # Split into train and test set + train_x, test_x, train_y, test_y = train_test_split(data, labels, test_size=0.25) + # Build input matrices for XGBoost + train_set = xgb.DMatrix(train_x, label=train_y) + test_set = xgb.DMatrix(test_x, label=test_y) + + # Train the classifier, using the Tune callback + xgb.train( + config, + train_set, + evals=[(test_set, "test")], + verbose_eval=False, + callbacks=[ + TuneReportCheckpointCallback(frequency=1, filename=CHECKPOINT_FILENAME) + ], + ) + + +def train_breast_cancer_cv(config: dict): + # This is a simple training function to be passed into Tune + # using xgboost's cross validation functionality + + # Load dataset + data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True) + + # For CV, we need to average over a list of results form folds + def average_cv_folds(results_dict: Dict[str, List[float]]) -> Dict[str, float]: + return {k: np.mean(v) for k, v in results_dict.items()} + + train_set = xgb.DMatrix(data, label=labels) + + # Run CV, using the Tune callback + xgb.cv( + config, + train_set, + verbose_eval=False, + stratified=True, + # Checkpointing is not supported for CV + callbacks=[ + TuneReportCheckpointCallback( + results_postprocessing_fn=average_cv_folds, frequency=0 + ) + ], + ) + + +def get_best_model_checkpoint(best_result: "ray.train.Result"): + best_bst = TuneReportCheckpointCallback.get_model( + best_result.checkpoint, filename=CHECKPOINT_FILENAME + ) + accuracy = 1.0 - best_result.metrics["test-error"] + print(f"Best model parameters: {best_result.config}") + print(f"Best model total accuracy: {accuracy:.4f}") + return best_bst + + +def tune_xgboost(use_cv: bool = False): + search_space = { + # You can mix constants with search space objects. + "objective": "binary:logistic", + "eval_metric": ["logloss", "error"], + "max_depth": tune.randint(1, 9), + "min_child_weight": tune.choice([1, 2, 3]), + "subsample": tune.uniform(0.5, 1.0), + "eta": tune.loguniform(1e-4, 1e-1), + } + # This will enable aggressive early stopping of bad trials. + scheduler = ASHAScheduler( + max_t=10, grace_period=1, reduction_factor=2 # 10 training iterations + ) + + tuner = tune.Tuner( + tune.with_resources( + train_breast_cancer if not use_cv else train_breast_cancer_cv, + # You can add "gpu": 0.1 to allocate GPUs + resources={"cpu": 1}, + ), + tune_config=tune.TuneConfig( + metric="test-logloss", + mode="min", + num_samples=10, + scheduler=scheduler, + ), + param_space=search_space, + ) + results = tuner.fit() + + return results.get_best_result() + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--use-cv", action="store_true", help="Use `xgb.cv` instead of `xgb.train`." + ) + args, _ = parser.parse_known_args() + + best_result = tune_xgboost(args.use_cv) + + # Load the best model checkpoint. + # Checkpointing is not supported when using `xgb.cv` + if not args.use_cv: + best_bst = get_best_model_checkpoint(best_result) + + # You could now do further predictions with + # best_bst.predict(...) diff --git a/.venv/lib/python3.11/site-packages/ray/tune/integration/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/ray/tune/integration/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af314acd81b92d439a5fec98c26bcd27bbfab3e8 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/ray/tune/integration/__pycache__/__init__.cpython-311.pyc differ