UniMus
/

OpenJMLA

@@ -111,13 +111,13 @@ class MAELMConfig(PretrainedConfig):
         per_device_train_batch_size=12,
         learning_rate=0.00005,
         lm_lr_ratio=0.1,
-        tokenizer_name='Llama-2-7b-hf',
         resume_from_checkpoint=None,
         resume_from_pth='epoch_4-step_8639-allstep_60000.pth',
         backbone={'name': 'MAEViT', 'arch': 'b', 'patch_size': 16, 'mask_ratio': 0.0, 'img_size': [80, 2992], \
                 'ckpt': 'epoch_20.pth'},
         neck={'name': 'LMDecoder', 'patch_size': 16, 'img_size': [80, 2992], 'in_chans': 3, 'embed_dim': 768, \
-                'decoder_embed_dim': 4544, 'freeze_decoder': True, 'decoder_type': 'Llama-2-7b-hf'},
         wandb={'proj': 'ATRena_cap', 'expname': 'cap_lynx_apmPT_mccaigc1wFT'},
         **kwargs,
     ):

         per_device_train_batch_size=12,
         learning_rate=0.00005,
         lm_lr_ratio=0.1,
+        tokenizer_name='meta-llama/Llama-2-7b-hf',
         resume_from_checkpoint=None,
         resume_from_pth='epoch_4-step_8639-allstep_60000.pth',
         backbone={'name': 'MAEViT', 'arch': 'b', 'patch_size': 16, 'mask_ratio': 0.0, 'img_size': [80, 2992], \
                 'ckpt': 'epoch_20.pth'},
         neck={'name': 'LMDecoder', 'patch_size': 16, 'img_size': [80, 2992], 'in_chans': 3, 'embed_dim': 768, \
+                'decoder_embed_dim': 4544, 'freeze_decoder': True, 'decoder_type': 'meta-llama/Llama-2-7b-hf'},
         wandb={'proj': 'ATRena_cap', 'expname': 'cap_lynx_apmPT_mccaigc1wFT'},
         **kwargs,
     ):