{ "env_cls": "qiskit_gym.envs.synthesis.PermutationEnv", "env": { "num_qubits": 33, "difficulty": 1, "gateset": [ [ "SWAP", [ 0, 1 ] ], [ "SWAP", [ 1, 2 ] ], [ "SWAP", [ 2, 3 ] ], [ "SWAP", [ 3, 5 ] ], [ "SWAP", [ 3, 30 ] ], [ "SWAP", [ 1, 4 ] ], [ "SWAP", [ 4, 7 ] ], [ "SWAP", [ 6, 7 ] ], [ "SWAP", [ 7, 10 ] ], [ "SWAP", [ 5, 8 ] ], [ "SWAP", [ 8, 9 ] ], [ "SWAP", [ 8, 11 ] ], [ "SWAP", [ 11, 14 ] ], [ "SWAP", [ 10, 12 ] ], [ "SWAP", [ 12, 13 ] ], [ "SWAP", [ 12, 15 ] ], [ "SWAP", [ 13, 14 ] ], [ "SWAP", [ 14, 16 ] ], [ "SWAP", [ 16, 19 ] ], [ "SWAP", [ 15, 18 ] ], [ "SWAP", [ 17, 18 ] ], [ "SWAP", [ 18, 21 ] ], [ "SWAP", [ 19, 20 ] ], [ "SWAP", [ 19, 22 ] ], [ "SWAP", [ 21, 23 ] ], [ "SWAP", [ 23, 24 ] ], [ "SWAP", [ 24, 25 ] ], [ "SWAP", [ 22, 25 ] ], [ "SWAP", [ 25, 26 ] ], [ "SWAP", [ 23, 27 ] ], [ "SWAP", [ 27, 28 ] ], [ "SWAP", [ 28, 29 ] ], [ "SWAP", [ 30, 31 ] ], [ "SWAP", [ 31, 32 ] ] ], "depth_slope": 2, "max_depth": 512, "metrics_weights": { "n_cnots": 0.001, "n_layers_cnots": 0.001, "n_layers": 0.001, "n_gates": 0.001 }, "add_inverts": true, "add_perms": true }, "policy_cls": "twisterl.nn.BasicPolicy", "policy": { "embedding_size": 512, "common_layers": [ 256 ], "policy_layers": [], "value_layers": [] }, "algorithm_cls": "twisterl.rl.PPO", "algorithm": { "collecting": { "num_cores": 32, "num_episodes": 1024, "lambda": 0.995, "gamma": 0.995 }, "training": { "num_epochs": 10, "vf_coef": 0.8, "ent_coef": 0.01, "clip_ratio": 0.1, "normalize_advantage": false }, "learning": { "diff_threshold": 0.85, "diff_max": 1024, "diff_metric": "ppo_deterministic" }, "optimizer": { "lr": 0.0003 }, "evals": { "ppo_deterministic": { "num_episodes": 100, "deterministic": true, "num_searches": 1, "num_mcts_searches": 0, "num_cores": 32, "C": 1.41 }, "ppo_10": { "num_episodes": 100, "deterministic": false, "num_searches": 10, "num_mcts_searches": 0, "num_cores": 32, "C": 1.41 } }, "logging": { "log_freq": 1, "checkpoint_freq": 10 } } }