{ "env_cls": "qiskit_gym.envs.synthesis.PermutationEnv", "env": { "num_qubits": 65, "difficulty": 1, "gateset": [ [ "SWAP", [ 0, 1 ] ], [ "SWAP", [ 0, 10 ] ], [ "SWAP", [ 1, 2 ] ], [ "SWAP", [ 2, 3 ] ], [ "SWAP", [ 3, 4 ] ], [ "SWAP", [ 4, 5 ] ], [ "SWAP", [ 5, 6 ] ], [ "SWAP", [ 6, 7 ] ], [ "SWAP", [ 7, 8 ] ], [ "SWAP", [ 8, 9 ] ], [ "SWAP", [ 10, 13 ] ], [ "SWAP", [ 4, 11 ] ], [ "SWAP", [ 8, 12 ] ], [ "SWAP", [ 13, 14 ] ], [ "SWAP", [ 14, 15 ] ], [ "SWAP", [ 15, 16 ] ], [ "SWAP", [ 15, 24 ] ], [ "SWAP", [ 11, 17 ] ], [ "SWAP", [ 16, 17 ] ], [ "SWAP", [ 17, 18 ] ], [ "SWAP", [ 18, 19 ] ], [ "SWAP", [ 19, 20 ] ], [ "SWAP", [ 12, 21 ] ], [ "SWAP", [ 20, 21 ] ], [ "SWAP", [ 21, 22 ] ], [ "SWAP", [ 22, 23 ] ], [ "SWAP", [ 23, 26 ] ], [ "SWAP", [ 19, 25 ] ], [ "SWAP", [ 27, 38 ] ], [ "SWAP", [ 27, 28 ] ], [ "SWAP", [ 28, 29 ] ], [ "SWAP", [ 24, 29 ] ], [ "SWAP", [ 29, 30 ] ], [ "SWAP", [ 30, 31 ] ], [ "SWAP", [ 31, 39 ] ], [ "SWAP", [ 31, 32 ] ], [ "SWAP", [ 25, 33 ] ], [ "SWAP", [ 32, 33 ] ], [ "SWAP", [ 33, 34 ] ], [ "SWAP", [ 34, 35 ] ], [ "SWAP", [ 35, 36 ] ], [ "SWAP", [ 36, 37 ] ], [ "SWAP", [ 26, 37 ] ], [ "SWAP", [ 38, 41 ] ], [ "SWAP", [ 35, 40 ] ], [ "SWAP", [ 41, 42 ] ], [ "SWAP", [ 42, 43 ] ], [ "SWAP", [ 43, 44 ] ], [ "SWAP", [ 43, 52 ] ], [ "SWAP", [ 44, 45 ] ], [ "SWAP", [ 39, 45 ] ], [ "SWAP", [ 45, 46 ] ], [ "SWAP", [ 46, 47 ] ], [ "SWAP", [ 47, 48 ] ], [ "SWAP", [ 47, 53 ] ], [ "SWAP", [ 40, 49 ] ], [ "SWAP", [ 48, 49 ] ], [ "SWAP", [ 49, 50 ] ], [ "SWAP", [ 50, 51 ] ], [ "SWAP", [ 51, 54 ] ], [ "SWAP", [ 52, 56 ] ], [ "SWAP", [ 53, 60 ] ], [ "SWAP", [ 54, 64 ] ], [ "SWAP", [ 55, 56 ] ], [ "SWAP", [ 56, 57 ] ], [ "SWAP", [ 57, 58 ] ], [ "SWAP", [ 58, 59 ] ], [ "SWAP", [ 59, 60 ] ], [ "SWAP", [ 60, 61 ] ], [ "SWAP", [ 61, 62 ] ], [ "SWAP", [ 62, 63 ] ], [ "SWAP", [ 63, 64 ] ] ], "depth_slope": 2, "max_depth": 512, "metrics_weights": { "n_cnots": 0.01, "n_layers_cnots": 0.01, "n_layers": 0.01, "n_gates": 0.01 }, "add_inverts": true, "add_perms": true }, "policy_cls": "twisterl.nn.BasicPolicy", "policy": { "embedding_size": 512, "common_layers": [ 256 ], "policy_layers": [], "value_layers": [] }, "algorithm_cls": "twisterl.rl.PPO", "algorithm": { "collecting": { "num_cores": 32, "num_episodes": 1024, "lambda": 0.995, "gamma": 0.995 }, "training": { "num_epochs": 10, "vf_coef": 0.8, "ent_coef": 0.01, "clip_ratio": 0.1, "normalize_advantage": false }, "learning": { "diff_threshold": 0.85, "diff_max": 2048, "diff_metric": "ppo_deterministic" }, "optimizer": { "lr": 0.0003 }, "evals": { "ppo_deterministic": { "num_episodes": 100, "deterministic": true, "num_searches": 1, "num_mcts_searches": 0, "num_cores": 32, "C": 1.41 }, "ppo_10": { "num_episodes": 100, "deterministic": false, "num_searches": 10, "num_mcts_searches": 0, "num_cores": 32, "C": 1.41 } }, "logging": { "log_freq": 1, "checkpoint_freq": 10 } } }