dataset_path: /global/cfs/cdirs/m4717/azton/galaxy-foundations/object_foundation/utils/supermock_dataset_11.2-14.json mask_token: 0 masked_generation: false masking_prob: - 0.2 - 0.2 - 0.2 - 0.2 - 0.5 - 0.5 - 0.5 modalities: - SFH - SED - mag_{band}_lsst - mag_{band}_spherex - redshift - halo_mass - stellar_mass model_config: attention_probs_dropout_prob: 0.1 classifier_dropout: 0.0 contrastive_temperature: 0.05 hidden_dropout_prob: 0.1 hidden_size: 768 intermediate_size: 3072 loss_weights: contrastive: rounds: 0 w0T: - 0 - 0 masked: rounds: 0 w0T: - 0.8 - 3 smooth: rounds: 0 w0T: - 0 - 0.3 unmasked: rounds: 0 w0T: - 0.2 - 0.3 max_position_embeddings: 1149 num_attention_heads: 12 num_hidden_layers: 18 pad_token_id: -1 transform_numeric: false use_contrastive_loss: false use_mlm_loss: true use_regression_loss: false use_xval_loss: false vocab_size: 2048 model_name_or_path: galaxybert tokenizer_name_or_path: Salesforce/SFR-Embedding-Mistral training_args: _n_gpu: 1 accelerator_config: dispatch_batches: null even_batches: true gradient_accumulation_kwargs: null non_blocking: false split_batches: false use_configured_state: false use_seedable_sampler: true adafactor: false adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1.0e-08 auto_find_batch_size: false average_tokens_across_devices: false batch_eval_metrics: false bf16: true bf16_full_eval: false data_seed: null dataloader_drop_last: false dataloader_num_workers: 16 dataloader_persistent_workers: false dataloader_pin_memory: true dataloader_prefetch_factor: 8 ddp_backend: null ddp_broadcast_buffers: null ddp_bucket_cap_mb: null ddp_find_unused_parameters: null ddp_timeout: 1800 debug: [] deepspeed: null disable_tqdm: false dispatch_batches: null do_eval: true do_predict: false do_train: false eval_accumulation_steps: 5 eval_delay: 0 eval_do_concat_batches: true eval_on_start: false eval_steps: 20 eval_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy - steps eval_use_gather_object: false evaluation_strategy: null fp16: false fp16_backend: auto fp16_full_eval: false fp16_opt_level: O1 fsdp: [] fsdp_config: min_num_params: 0 xla: false xla_fsdp_grad_ckpt: false xla_fsdp_v2: false fsdp_min_num_params: 0 fsdp_transformer_layer_cls_to_wrap: null full_determinism: false gradient_accumulation_steps: 5 gradient_checkpointing: false gradient_checkpointing_kwargs: null greater_is_better: null group_by_length: false half_precision_backend: auto hub_always_push: false hub_model_id: null hub_private_repo: false hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy - every_save hub_token: null ignore_data_skip: false include_for_metrics: [] include_inputs_for_metrics: false include_num_input_tokens_seen: false include_tokens_per_second: false jit_mode_eval: false label_names: null label_smoothing_factor: 0.0 learning_rate: 0.0001 length_column_name: length load_best_model_at_end: false local_rank: 0 log_level: passive log_level_replica: warning log_on_each_node: true logging_dir: sm_foundation_lg_gmm_nomasklab logging_first_step: true logging_nan_inf_filter: true logging_steps: 1 logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy - steps lr_scheduler_kwargs: {} lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType - cosine max_grad_norm: 1.0 max_steps: -1 metric_for_best_model: null mp_parameters: '' neftune_noise_alpha: null no_cuda: false num_train_epochs: 60 optim: !!python/object/apply:transformers.training_args.OptimizerNames - adamw_torch optim_args: null optim_target_modules: null output_dir: supermock_te60_ overwrite_output_dir: true past_index: -1 per_device_eval_batch_size: 100 per_device_train_batch_size: 100 per_gpu_eval_batch_size: null per_gpu_train_batch_size: null prediction_loss_only: false push_to_hub: false push_to_hub_model_id: null push_to_hub_organization: null push_to_hub_token: null ray_scope: last remove_unused_columns: false report_to: - wandb restore_callback_states_from_checkpoint: false resume_from_checkpoint: null run_name: NO_SHARD_b50 save_on_each_node: false save_only_model: false save_safetensors: true save_steps: 30 save_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy - steps save_total_limit: 360 seed: 42 skip_memory_metrics: true split_batches: null tf32: null torch_compile: false torch_compile_backend: null torch_compile_mode: null torch_empty_cache_steps: null torchdynamo: null tpu_metrics_debug: false tpu_num_cores: null use_cpu: false use_ipex: false use_legacy_prediction_loop: false use_liger_kernel: false use_mps_device: false warmup_ratio: 0.0 warmup_steps: 0 weight_decay: 0.1 transform_numeric: false wandb_project: supermock-foundation-perl wandb_run_name: ''