tsilva
/

CartPole-v1_ppo

Reinforcement Learning

Eval Results (legacy)

Model card Files Files and versions

CartPole-v1_ppo / artifacts /configs /config.json

tsilva's picture

Upload folder using huggingface_hub

1fc4315 verified 8 months ago

history blame contribute delete

1.22 kB

	{
	"env_id": "CartPole-v1",
	"algo_id": "ppo",
	"n_steps": 32,
	"batch_size": 256,
	"n_epochs": 20,
	"max_epochs": null,
	"n_timesteps": 100000.0,
	"seed": 42,
	"n_envs": 8,
	"subproc": null,
	"env_wrappers": [],
	"env_kwargs": {},
	"normalize_obs": false,
	"normalize_reward": false,
	"frame_stack": 1,
	"obs_type": "rgb",
	"hidden_dims": [
	64,
	64
	],
	"activation": "tanh",
	"policy": "MlpPolicy",
	"policy_kwargs": {},
	"policy_lr": 0.001,
	"learning_rate": 0.001,
	"learning_rate_schedule": "linear",
	"max_grad_norm": 0.5,
	"gamma": 0.98,
	"gae_lambda": 0.8,
	"ent_coef": 0.0,
	"vf_coef": 0.5,
	"clip_range": 0.2,
	"clip_range_schedule": "linear",
	"normalize_advantages": "batch",
	"use_baseline": false,
	"eval_freq_epochs": 100,
	"eval_warmup_epochs": 0,
	"eval_episodes": 10,
	"eval_recording_freq_epochs": 100,
	"eval_async": false,
	"eval_deterministic": true,
	"reward_threshold": null,
	"early_stop_on_eval_threshold": true,
	"early_stop_on_train_threshold": false,
	"log_per_env_eval_metrics": false,
	"project_id": "CartPole-v1",
	"checkpoint_dir": "checkpoints",
	"resume": false,
	"accelerator": "cpu",
	"devices": null,
	"normalize": null
	}