{
  "env_cls": "qiskit_gym.envs.synthesis.PermutationEnv",
  "env": {
    "num_qubits": 33,
    "difficulty": 1,
    "gateset": [
      [
        "SWAP",
        [
          0,
          1
        ]
      ],
      [
        "SWAP",
        [
          1,
          2
        ]
      ],
      [
        "SWAP",
        [
          2,
          3
        ]
      ],
      [
        "SWAP",
        [
          3,
          5
        ]
      ],
      [
        "SWAP",
        [
          3,
          30
        ]
      ],
      [
        "SWAP",
        [
          1,
          4
        ]
      ],
      [
        "SWAP",
        [
          4,
          7
        ]
      ],
      [
        "SWAP",
        [
          6,
          7
        ]
      ],
      [
        "SWAP",
        [
          7,
          10
        ]
      ],
      [
        "SWAP",
        [
          5,
          8
        ]
      ],
      [
        "SWAP",
        [
          8,
          9
        ]
      ],
      [
        "SWAP",
        [
          8,
          11
        ]
      ],
      [
        "SWAP",
        [
          11,
          14
        ]
      ],
      [
        "SWAP",
        [
          10,
          12
        ]
      ],
      [
        "SWAP",
        [
          12,
          13
        ]
      ],
      [
        "SWAP",
        [
          12,
          15
        ]
      ],
      [
        "SWAP",
        [
          13,
          14
        ]
      ],
      [
        "SWAP",
        [
          14,
          16
        ]
      ],
      [
        "SWAP",
        [
          16,
          19
        ]
      ],
      [
        "SWAP",
        [
          15,
          18
        ]
      ],
      [
        "SWAP",
        [
          17,
          18
        ]
      ],
      [
        "SWAP",
        [
          18,
          21
        ]
      ],
      [
        "SWAP",
        [
          19,
          20
        ]
      ],
      [
        "SWAP",
        [
          19,
          22
        ]
      ],
      [
        "SWAP",
        [
          21,
          23
        ]
      ],
      [
        "SWAP",
        [
          23,
          24
        ]
      ],
      [
        "SWAP",
        [
          24,
          25
        ]
      ],
      [
        "SWAP",
        [
          22,
          25
        ]
      ],
      [
        "SWAP",
        [
          25,
          26
        ]
      ],
      [
        "SWAP",
        [
          23,
          27
        ]
      ],
      [
        "SWAP",
        [
          27,
          28
        ]
      ],
      [
        "SWAP",
        [
          28,
          29
        ]
      ],
      [
        "SWAP",
        [
          30,
          31
        ]
      ],
      [
        "SWAP",
        [
          31,
          32
        ]
      ]
    ],
    "depth_slope": 2,
    "max_depth": 512,
    "metrics_weights": {
      "n_cnots": 0.001,
      "n_layers_cnots": 0.001,
      "n_layers": 0.001,
      "n_gates": 0.001
    },
    "add_inverts": true,
    "add_perms": true
  },
  "policy_cls": "twisterl.nn.BasicPolicy",
  "policy": {
    "embedding_size": 512,
    "common_layers": [
      256
    ],
    "policy_layers": [],
    "value_layers": []
  },
  "algorithm_cls": "twisterl.rl.PPO",
  "algorithm": {
    "collecting": {
      "num_cores": 32,
      "num_episodes": 1024,
      "lambda": 0.995,
      "gamma": 0.995
    },
    "training": {
      "num_epochs": 10,
      "vf_coef": 0.8,
      "ent_coef": 0.01,
      "clip_ratio": 0.1,
      "normalize_advantage": false
    },
    "learning": {
      "diff_threshold": 0.85,
      "diff_max": 1024,
      "diff_metric": "ppo_deterministic"
    },
    "optimizer": {
      "lr": 0.0003
    },
    "evals": {
      "ppo_deterministic": {
        "num_episodes": 100,
        "deterministic": true,
        "num_searches": 1,
        "num_mcts_searches": 0,
        "num_cores": 32,
        "C": 1.41
      },
      "ppo_10": {
        "num_episodes": 100,
        "deterministic": false,
        "num_searches": 10,
        "num_mcts_searches": 0,
        "num_cores": 32,
        "C": 1.41
      }
    },
    "logging": {
      "log_freq": 1,
      "checkpoint_freq": 10
    }
  }
}