{
  "name": "boxing_delta_iris_world_model",
  "env": "BoxingNoFrameSkip-v4",
  "model_type": "delta_iris",
  "metadata": {
    "latent_dim": [1, 4, 1024],
    "two_hot_rews": false,
    "tokens_per_block": 6,
    "num_tokens": 4,
    "tokens_grid_res": 2,
    "token_res": 4
  },
  "util_folders":{
    "data": "../../src/data",
    "models": "../../src/models"
  },
  "requirements":{
    "-r": "requirements.txt"
  },
  "models": [
    {
      "name": "world_model",
      "framework": null,
      "format": "state_dict",
      "source": {
        "weights_path": "world_model.pt",
        "class_path": "../../src/world_model.py",
        "class_name": "WorldModel",
        "class_args": [
        {
          "latent_vocab_size": 1024,
          "num_actions": 18,
          "image_channels": 3,
          "image_size": 64,
          "two_hot_rews": false,
          "transformer_config": {
            "tokens_per_block": 6,
            "max_blocks": 26,
            "num_layers": 3,
            "num_heads": 4,
            "embed_dim": 256,
            "attention": "causal",
            "embed_pdrop": 0.0,
            "resid_pdrop": 0.0,
            "attn_pdrop": 0.0
          },
          "frame_cnn_config": {
            "image_channels": 3,
            "latent_dim": 4,
            "num_channels": 32,
            "mult": [1, 1, 2, 2, 4],
            "down": [1, 0, 1, 1, 0]
          }
        }]
      },
      "signature": {
        "inputs": ["wm_input_sequence", "use_kv_cache"],
        "call_mode": "positional"
      },
      "sub_models":
      [
        {
          "name": "act_emb",
          "sub_model_name": "act_emb",
          "signature": 
          {
            "inputs": ["act"],
            "call_mode": "positional"
          }
        },
        {
          "name": "latents_emb",
          "sub_model_name": "latents_emb",
          "signature": 
          {
            "inputs": ["latent_tokens"],
            "call_mode": "positional"
          }
        },
        {
          "name": "transformer",
          "sub_model_name": "transformer",
          "signature": 
          {
            "call_mode": "auto"
          }
        },
        {
          "name": "frame_cnn",
          "sub_model_name": "frame_cnn",
          "signature": 
          {
            "inputs": ["obs"],
            "call_mode": "auto"
          }
        }
      ],
      "methods":
      [
        {
          "name": "blocks_left_in_kv_cache",
          "method_name": "blocks_left_in_kv_cache"
        },
        {
          "name": "reset_kv_cache",
          "method_name": "reset_kv_cache"
        }
      ]
    },
    {
      "name": "tokenizer",
      "framework": null,
      "format": "state_dict",
      "source": {
        "weights_path": "tokenizer.pt",
        "class_path": "../../src/tokenizer.py",
        "class_name": "Tokenizer",
        "class_args": [{
          "image_channels": 3,
          "image_size": 64,
          "num_actions": 18,
          "num_tokens": 4,
          "decoder_act_channels": 4,
          "codebook_size": 1024,
          "codebook_dim": 64,
          "max_codebook_updates_with_revival": 0,
          "encoder_config": {
            "image_channels": 7,
            "latent_dim": 64,
            "num_channels": 64,
            "mult": [1, 1, 2, 2, 4],
            "down": [1, 0, 1, 1, 0]
          },
          "decoder_config": { 
            "image_channels": 3,
            "latent_dim": 84,
            "num_channels": 64,
            "mult": [1, 1, 2, 2, 4],
            "down": [1, 0, 1, 1, 0]
          },
          "frame_cnn_config": {
            "image_channels": 3,
            "latent_dim": 16,
            "num_channels": 32,
            "mult": [1, 1, 2, 2, 4],
            "down": [1, 0, 1, 1, 0]
          }
        }]
      },
      "signature": {
        "inputs": ["o1", "a", "o2"],
        "call_mode": "positional"
      },
      "sub_models":
      [
        {
          "name": "decode",
          "sub_model_name": "decode",
          "signature": 
          {
            "inputs": ["obs", "act", "q", "should_clamp"],
            "call_mode": "positional"
          }
        }
      ],
      "methods":
      [
        {
          "name": "embed_tokens",
          "method_name": "embed_tokens"
        }
      ]
    }
  ]
}