diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..a18d7dd1b03c2721a56df640e3b304f6f4134819 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/run-9puzigbg.wandb filter=lfs diff=lfs merge=lfs -text +2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/run-yhjy9tz9.wandb filter=lfs diff=lfs merge=lfs -text +2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_132338-qrt50pak/run-qrt50pak.wandb filter=lfs diff=lfs merge=lfs -text +2026.01.21/13.27.30_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_132748-8pqnk39p/run-8pqnk39p.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml b/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2c0703a726938135bfd72381f09a35f4c693378 --- /dev/null +++ b/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml @@ -0,0 +1,163 @@ +name: train_llmbc_lowdim +_target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace +obs_dim: ${task.obs_dim} +action_dim: ${task.action_dim} +task_name: ${task.name} +exp_name: default +model_name: ${llm.name} +horizon: 1 +n_obs_steps: 1 +n_action_steps: 1 +n_latency_steps: 0 +past_action_visible: false +llm_orig_expert_feedback: true +llm_do_sample: false +policy: + _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy + model: + _target_: llmbc.model.policy.policy_mlp.PolicyMLP + input_size: ${eval:'${n_obs_steps}*${obs_dim}'} + hidden_size: + - 256 + - 256 + output_size: ${eval:'${n_action_steps}*${action_dim}'} + activation: relu + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + llm_discriminator: + _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator + task_id: ${task_name} + llm_translator: + _target_: llmbc.translator.llm_translator.LLMTranslator + cfg: ${llm} + obs_dim: ${task.obs_dim} + action_dim: ${task.action_dim} + horizon: ${horizon} + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + loss_bc_weight: 1.0 + loss_llm_weight: 0.01 + horizon: ${horizon} + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + normalize_llm_loss: true +dataloader: + batch_size: 16 + num_workers: 0 + shuffle: true + pin_memory: false + persistent_workers: false +val_dataloader: + batch_size: 16 + num_workers: 0 + shuffle: true + pin_memory: false + persistent_workers: false +optimizer: + _target_: torch.optim.AdamW + lr: 0.01 + betas: + - 0.95 + - 0.999 + eps: 1.0e-08 + weight_decay: 1.0e-06 +training: + device: cuda:0 + seed: 42 + debug: false + resume: false + lr_scheduler: cosine + lr_warmup_steps: 10 + num_epochs: 1001 + gradient_accumulate_every: 8 + grad_norm_clip: 0.5 + rollout_every: 5 + checkpoint_every: 5 + val_every: 1 + sample_every: 5 + sample_max_batch: 128 + max_train_steps: null + max_val_steps: null + tqdm_interval_sec: 1.0 +logging: + project: ${task.name}-training + resume: true + mode: online + name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} + tags: + - ${name} + - ${task_name} + - ${exp_name} + id: null + group: null +checkpoint: + topk: + monitor_key: test_success_rate + mode: max + k: 5 + format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt + save_last_ckpt: true + save_last_snapshot: false +multi_run: + run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} +task: + name: box-close-v2 + obs_dim: 9 + action_dim: 4 + env_runner: + _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner + env_name: llf-metaworld-box-close-v2 + n_train: 10 + n_test: 50 + n_envs: 10 + max_steps: 30 + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + instruction_type: b + feedback_type: + - hp + - hn + - fp + visual: false + discount: 0.9 + dataset: + _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset + data_path: datasets/box-close-v2.pt + data_path2: datasets/box-close-v2.pt + horizon: ${horizon} + pad_before: ${eval:'${n_obs_steps}-1'} + pad_after: ${eval:'${n_action_steps}-1'} + obs_eef_target: true + use_manual_normalizer: false + val_ratio: 0.1 + dummy_normalizer: true + instructor: + _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor +llm: + name: HuggingFaceTB/SmolLM2-135M-Instruct + model_name: SmolLM2-135M-Instruct + config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig + causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM + use_quantization: false + use_joint_mlp_projector: true + llm_mode: ete-finetuned + finetune_mode: orig + checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890 + max_length: 100 + lora_config: + r: 32 + lora_alpha: 64 + lora_dropout: 0.05 + bias: none + task_type: CAUSAL_LM + prompter: + _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter + use_joint_mlp_projector: true + hydra: + job: + override_dirname: ${model_name} + run: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name} diff --git a/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml b/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7d02650fca9ca9f98f667f01a5bdc74c01862af2 --- /dev/null +++ b/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + sweep: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: llmbc_box-close-v2.yaml + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /work/u1131674/LLM-BC + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /work/u1131674/LLM-BC/config/main_table + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml b/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/train.log b/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/train.log new file mode 100644 index 0000000000000000000000000000000000000000..b646e1ad6014f5209fe2c576db33501ed3484263 --- /dev/null +++ b/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/train.log @@ -0,0 +1,2 @@ +[2026-01-21 12:08:39,454][hydra.utils][ERROR] - Error initializing class at llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace: Error loading 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace': +ImportError("cannot import name 'Sentinel' from 'typing_extensions' (/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/typing_extensions.py)") diff --git a/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml b/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2c0703a726938135bfd72381f09a35f4c693378 --- /dev/null +++ b/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml @@ -0,0 +1,163 @@ +name: train_llmbc_lowdim +_target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace +obs_dim: ${task.obs_dim} +action_dim: ${task.action_dim} +task_name: ${task.name} +exp_name: default +model_name: ${llm.name} +horizon: 1 +n_obs_steps: 1 +n_action_steps: 1 +n_latency_steps: 0 +past_action_visible: false +llm_orig_expert_feedback: true +llm_do_sample: false +policy: + _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy + model: + _target_: llmbc.model.policy.policy_mlp.PolicyMLP + input_size: ${eval:'${n_obs_steps}*${obs_dim}'} + hidden_size: + - 256 + - 256 + output_size: ${eval:'${n_action_steps}*${action_dim}'} + activation: relu + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + llm_discriminator: + _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator + task_id: ${task_name} + llm_translator: + _target_: llmbc.translator.llm_translator.LLMTranslator + cfg: ${llm} + obs_dim: ${task.obs_dim} + action_dim: ${task.action_dim} + horizon: ${horizon} + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + loss_bc_weight: 1.0 + loss_llm_weight: 0.01 + horizon: ${horizon} + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + normalize_llm_loss: true +dataloader: + batch_size: 16 + num_workers: 0 + shuffle: true + pin_memory: false + persistent_workers: false +val_dataloader: + batch_size: 16 + num_workers: 0 + shuffle: true + pin_memory: false + persistent_workers: false +optimizer: + _target_: torch.optim.AdamW + lr: 0.01 + betas: + - 0.95 + - 0.999 + eps: 1.0e-08 + weight_decay: 1.0e-06 +training: + device: cuda:0 + seed: 42 + debug: false + resume: false + lr_scheduler: cosine + lr_warmup_steps: 10 + num_epochs: 1001 + gradient_accumulate_every: 8 + grad_norm_clip: 0.5 + rollout_every: 5 + checkpoint_every: 5 + val_every: 1 + sample_every: 5 + sample_max_batch: 128 + max_train_steps: null + max_val_steps: null + tqdm_interval_sec: 1.0 +logging: + project: ${task.name}-training + resume: true + mode: online + name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} + tags: + - ${name} + - ${task_name} + - ${exp_name} + id: null + group: null +checkpoint: + topk: + monitor_key: test_success_rate + mode: max + k: 5 + format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt + save_last_ckpt: true + save_last_snapshot: false +multi_run: + run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} +task: + name: box-close-v2 + obs_dim: 9 + action_dim: 4 + env_runner: + _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner + env_name: llf-metaworld-box-close-v2 + n_train: 10 + n_test: 50 + n_envs: 10 + max_steps: 30 + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + instruction_type: b + feedback_type: + - hp + - hn + - fp + visual: false + discount: 0.9 + dataset: + _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset + data_path: datasets/box-close-v2.pt + data_path2: datasets/box-close-v2.pt + horizon: ${horizon} + pad_before: ${eval:'${n_obs_steps}-1'} + pad_after: ${eval:'${n_action_steps}-1'} + obs_eef_target: true + use_manual_normalizer: false + val_ratio: 0.1 + dummy_normalizer: true + instructor: + _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor +llm: + name: HuggingFaceTB/SmolLM2-135M-Instruct + model_name: SmolLM2-135M-Instruct + config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig + causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM + use_quantization: false + use_joint_mlp_projector: true + llm_mode: ete-finetuned + finetune_mode: orig + checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890 + max_length: 100 + lora_config: + r: 32 + lora_alpha: 64 + lora_dropout: 0.05 + bias: none + task_type: CAUSAL_LM + prompter: + _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter + use_joint_mlp_projector: true + hydra: + job: + override_dirname: ${model_name} + run: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name} diff --git a/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml b/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91bc606fb6730b765a14ff99bb8ab85928681018 --- /dev/null +++ b/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + sweep: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: llmbc_box-close-v2.yaml + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /work/u1131674/LLM-BC + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /work/u1131674/LLM-BC/config/main_table + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml b/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/train.log b/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/train.log new file mode 100644 index 0000000000000000000000000000000000000000..cbd23c5f0a38965d4c06cfef28abe3889b9a8abe --- /dev/null +++ b/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/train.log @@ -0,0 +1,9 @@ +[2026-01-21 12:10:36,293][numexpr.utils][INFO] - Note: detected 224 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +[2026-01-21 12:10:36,293][numexpr.utils][INFO] - Note: NumExpr detected 224 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16. +[2026-01-21 12:10:36,293][numexpr.utils][INFO] - NumExpr defaulting to 16 threads. +[2026-01-21 12:10:42,233][datasets][INFO] - PyTorch version 2.2.2 available. +[2026-01-21 12:10:42,234][datasets][INFO] - TensorFlow version 2.15.1 available. +[2026-01-21 12:10:42,235][datasets][INFO] - JAX version 0.4.30 available. +[2026-01-21 12:11:05,787][matplotlib.font_manager][INFO] - Failed to extract font properties from /usr/share/fonts/google-noto-emoji/NotoColorEmoji.ttf: In FT2Font: Can not load face (unknown file format; error code 0x2) +[2026-01-21 12:11:05,799][matplotlib.font_manager][INFO] - generated new fontManager +[2026-01-21 12:11:07,857][OpenGL.platform.ctypesloader][INFO] - Failed to load library ( 'libOSMesa.so.0' ): libOSMesa.so.0: cannot open shared object file: No such file or directory diff --git a/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml b/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2c0703a726938135bfd72381f09a35f4c693378 --- /dev/null +++ b/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml @@ -0,0 +1,163 @@ +name: train_llmbc_lowdim +_target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace +obs_dim: ${task.obs_dim} +action_dim: ${task.action_dim} +task_name: ${task.name} +exp_name: default +model_name: ${llm.name} +horizon: 1 +n_obs_steps: 1 +n_action_steps: 1 +n_latency_steps: 0 +past_action_visible: false +llm_orig_expert_feedback: true +llm_do_sample: false +policy: + _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy + model: + _target_: llmbc.model.policy.policy_mlp.PolicyMLP + input_size: ${eval:'${n_obs_steps}*${obs_dim}'} + hidden_size: + - 256 + - 256 + output_size: ${eval:'${n_action_steps}*${action_dim}'} + activation: relu + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + llm_discriminator: + _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator + task_id: ${task_name} + llm_translator: + _target_: llmbc.translator.llm_translator.LLMTranslator + cfg: ${llm} + obs_dim: ${task.obs_dim} + action_dim: ${task.action_dim} + horizon: ${horizon} + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + loss_bc_weight: 1.0 + loss_llm_weight: 0.01 + horizon: ${horizon} + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + normalize_llm_loss: true +dataloader: + batch_size: 16 + num_workers: 0 + shuffle: true + pin_memory: false + persistent_workers: false +val_dataloader: + batch_size: 16 + num_workers: 0 + shuffle: true + pin_memory: false + persistent_workers: false +optimizer: + _target_: torch.optim.AdamW + lr: 0.01 + betas: + - 0.95 + - 0.999 + eps: 1.0e-08 + weight_decay: 1.0e-06 +training: + device: cuda:0 + seed: 42 + debug: false + resume: false + lr_scheduler: cosine + lr_warmup_steps: 10 + num_epochs: 1001 + gradient_accumulate_every: 8 + grad_norm_clip: 0.5 + rollout_every: 5 + checkpoint_every: 5 + val_every: 1 + sample_every: 5 + sample_max_batch: 128 + max_train_steps: null + max_val_steps: null + tqdm_interval_sec: 1.0 +logging: + project: ${task.name}-training + resume: true + mode: online + name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} + tags: + - ${name} + - ${task_name} + - ${exp_name} + id: null + group: null +checkpoint: + topk: + monitor_key: test_success_rate + mode: max + k: 5 + format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt + save_last_ckpt: true + save_last_snapshot: false +multi_run: + run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} +task: + name: box-close-v2 + obs_dim: 9 + action_dim: 4 + env_runner: + _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner + env_name: llf-metaworld-box-close-v2 + n_train: 10 + n_test: 50 + n_envs: 10 + max_steps: 30 + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + instruction_type: b + feedback_type: + - hp + - hn + - fp + visual: false + discount: 0.9 + dataset: + _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset + data_path: datasets/box-close-v2.pt + data_path2: datasets/box-close-v2.pt + horizon: ${horizon} + pad_before: ${eval:'${n_obs_steps}-1'} + pad_after: ${eval:'${n_action_steps}-1'} + obs_eef_target: true + use_manual_normalizer: false + val_ratio: 0.1 + dummy_normalizer: true + instructor: + _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor +llm: + name: HuggingFaceTB/SmolLM2-135M-Instruct + model_name: SmolLM2-135M-Instruct + config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig + causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM + use_quantization: false + use_joint_mlp_projector: true + llm_mode: ete-finetuned + finetune_mode: orig + checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890 + max_length: 100 + lora_config: + r: 32 + lora_alpha: 64 + lora_dropout: 0.05 + bias: none + task_type: CAUSAL_LM + prompter: + _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter + use_joint_mlp_projector: true + hydra: + job: + override_dirname: ${model_name} + run: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name} diff --git a/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml b/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac271dbca9ec313d412e8242332c5ec097ef22e5 --- /dev/null +++ b/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + sweep: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: llmbc_box-close-v2.yaml + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /work/u1131674/LLM-BC + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /work/u1131674/LLM-BC/config/main_table + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml b/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/train.log b/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/train.log new file mode 100644 index 0000000000000000000000000000000000000000..95a4626981ad6d3294759f0bcc7e2dd315d17e53 --- /dev/null +++ b/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/train.log @@ -0,0 +1,12 @@ +[2026-01-21 12:13:11,502][numexpr.utils][INFO] - Note: detected 224 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +[2026-01-21 12:13:11,502][numexpr.utils][INFO] - Note: NumExpr detected 224 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16. +[2026-01-21 12:13:11,502][numexpr.utils][INFO] - NumExpr defaulting to 16 threads. +[2026-01-21 12:13:16,444][datasets][INFO] - PyTorch version 2.2.2 available. +[2026-01-21 12:13:16,445][datasets][INFO] - TensorFlow version 2.15.1 available. +[2026-01-21 12:13:16,446][datasets][INFO] - JAX version 0.4.30 available. +[2026-01-21 12:13:41,170][root][INFO] - running build_ext +[2026-01-21 12:13:41,174][root][INFO] - building 'mujoco_py.cymj' extension +[2026-01-21 12:13:41,174][root][INFO] - creating /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py +[2026-01-21 12:13:41,196][root][INFO] - creating /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl +[2026-01-21 12:13:41,197][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.o -fopenmp -w +[2026-01-21 12:14:08,619][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.o -fopenmp -w diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2c0703a726938135bfd72381f09a35f4c693378 --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml @@ -0,0 +1,163 @@ +name: train_llmbc_lowdim +_target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace +obs_dim: ${task.obs_dim} +action_dim: ${task.action_dim} +task_name: ${task.name} +exp_name: default +model_name: ${llm.name} +horizon: 1 +n_obs_steps: 1 +n_action_steps: 1 +n_latency_steps: 0 +past_action_visible: false +llm_orig_expert_feedback: true +llm_do_sample: false +policy: + _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy + model: + _target_: llmbc.model.policy.policy_mlp.PolicyMLP + input_size: ${eval:'${n_obs_steps}*${obs_dim}'} + hidden_size: + - 256 + - 256 + output_size: ${eval:'${n_action_steps}*${action_dim}'} + activation: relu + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + llm_discriminator: + _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator + task_id: ${task_name} + llm_translator: + _target_: llmbc.translator.llm_translator.LLMTranslator + cfg: ${llm} + obs_dim: ${task.obs_dim} + action_dim: ${task.action_dim} + horizon: ${horizon} + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + loss_bc_weight: 1.0 + loss_llm_weight: 0.01 + horizon: ${horizon} + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + normalize_llm_loss: true +dataloader: + batch_size: 16 + num_workers: 0 + shuffle: true + pin_memory: false + persistent_workers: false +val_dataloader: + batch_size: 16 + num_workers: 0 + shuffle: true + pin_memory: false + persistent_workers: false +optimizer: + _target_: torch.optim.AdamW + lr: 0.01 + betas: + - 0.95 + - 0.999 + eps: 1.0e-08 + weight_decay: 1.0e-06 +training: + device: cuda:0 + seed: 42 + debug: false + resume: false + lr_scheduler: cosine + lr_warmup_steps: 10 + num_epochs: 1001 + gradient_accumulate_every: 8 + grad_norm_clip: 0.5 + rollout_every: 5 + checkpoint_every: 5 + val_every: 1 + sample_every: 5 + sample_max_batch: 128 + max_train_steps: null + max_val_steps: null + tqdm_interval_sec: 1.0 +logging: + project: ${task.name}-training + resume: true + mode: online + name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} + tags: + - ${name} + - ${task_name} + - ${exp_name} + id: null + group: null +checkpoint: + topk: + monitor_key: test_success_rate + mode: max + k: 5 + format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt + save_last_ckpt: true + save_last_snapshot: false +multi_run: + run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} +task: + name: box-close-v2 + obs_dim: 9 + action_dim: 4 + env_runner: + _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner + env_name: llf-metaworld-box-close-v2 + n_train: 10 + n_test: 50 + n_envs: 10 + max_steps: 30 + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + instruction_type: b + feedback_type: + - hp + - hn + - fp + visual: false + discount: 0.9 + dataset: + _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset + data_path: datasets/box-close-v2.pt + data_path2: datasets/box-close-v2.pt + horizon: ${horizon} + pad_before: ${eval:'${n_obs_steps}-1'} + pad_after: ${eval:'${n_action_steps}-1'} + obs_eef_target: true + use_manual_normalizer: false + val_ratio: 0.1 + dummy_normalizer: true + instructor: + _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor +llm: + name: HuggingFaceTB/SmolLM2-135M-Instruct + model_name: SmolLM2-135M-Instruct + config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig + causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM + use_quantization: false + use_joint_mlp_projector: true + llm_mode: ete-finetuned + finetune_mode: orig + checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890 + max_length: 100 + lora_config: + r: 32 + lora_alpha: 64 + lora_dropout: 0.05 + bias: none + task_type: CAUSAL_LM + prompter: + _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter + use_joint_mlp_projector: true + hydra: + job: + override_dirname: ${model_name} + run: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name} diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23f2f8c95d2d266554eae756877cc9f93bbef386 --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml @@ -0,0 +1,154 @@ +hydra: + run: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + sweep: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: train + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: llmbc_box-close-v2.yaml + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /work/u1131674/LLM-BC + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /work/u1131674/LLM-BC/config/main_table + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/logs.json.txt b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/logs.json.txt new file mode 100644 index 0000000000000000000000000000000000000000..037560c6b38137ae778adc9969a23130f16517cd --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/logs.json.txt @@ -0,0 +1,237 @@ +{"train_loss": 0.2577439248561859, "train_loss_bc": 0.25195014476776123, "train_loss_llm": 0.5793781280517578, "grad_norm": 0.12829534709453583, "global_step": 0, "epoch": 0, "lr": 0.001} +{"train_loss": 0.278277724981308, "train_loss_bc": 0.27264082431793213, "train_loss_llm": 0.5636913180351257, "grad_norm": 0.13488440215587616, "global_step": 1, "epoch": 0, "lr": 0.001} +{"train_loss": 0.29180172085762024, "train_loss_bc": 0.28621771931648254, "train_loss_llm": 0.5584006309509277, "grad_norm": 0.27443262934684753, "global_step": 2, "epoch": 0, "lr": 0.001} +{"train_loss": 0.2927302420139313, "train_loss_bc": 0.2865779399871826, "train_loss_llm": 0.6152303218841553, "grad_norm": 0.4118553102016449, "global_step": 3, "epoch": 0, "lr": 0.001} +{"train_loss": 0.28513821959495544, "train_loss_bc": 0.2797144651412964, "train_loss_llm": 0.5423756241798401, "grad_norm": 0.5492109656333923, "global_step": 4, "epoch": 0, "lr": 0.001} +{"train_loss": 0.31990620493888855, "train_loss_bc": 0.31439733505249023, "train_loss_llm": 0.5508874654769897, "grad_norm": 0.6955047845840454, "global_step": 5, "epoch": 0, "lr": 0.001} +{"train_loss": 0.27779361605644226, "train_loss_bc": 0.27196407318115234, "train_loss_llm": 0.5829552412033081, "grad_norm": 0.8313235640525818, "global_step": 6, "epoch": 0, "lr": 0.001} +{"train_loss": 0.23134832084178925, "train_loss_bc": 0.22543349862098694, "train_loss_llm": 0.5914825201034546, "grad_norm": 0.9541349411010742, "global_step": 7, "epoch": 0, "lr": 0.001} +{"train_loss": 0.2081925868988037, "train_loss_bc": 0.2028963267803192, "train_loss_llm": 0.5296263694763184, "grad_norm": 1.0696462392807007, "global_step": 8, "epoch": 0, "lr": 0.002} +{"train_loss": 0.2047700732946396, "train_loss_bc": 0.19888944923877716, "train_loss_llm": 0.5880619287490845, "grad_norm": 0.11593382805585861, "global_step": 9, "epoch": 0, "lr": 0.002} +{"train_loss": 0.2171408236026764, "train_loss_bc": 0.2111976146697998, "train_loss_llm": 0.5943207740783691, "grad_norm": 0.23421066999435425, "global_step": 10, "epoch": 0, "lr": 0.002} +{"train_loss": 0.211279034614563, "train_loss_bc": 0.2063535749912262, "train_loss_llm": 0.4925457537174225, "grad_norm": 0.3522197902202606, "global_step": 11, "epoch": 0, "lr": 0.002} +{"train_loss": 0.2630419433116913, "train_loss_bc": 0.2565857172012329, "train_loss_llm": 0.6456230282783508, "grad_norm": 0.48301446437835693, "global_step": 12, "epoch": 0, "lr": 0.002} +{"train_loss": 0.2441762089729309, "train_loss_bc": 0.23813079297542572, "train_loss_llm": 0.604541540145874, "grad_norm": 0.609789252281189, "global_step": 13, "epoch": 0, "lr": 0.002} +{"train_loss": 0.2768160402774811, "train_loss_bc": 0.27063897252082825, "train_loss_llm": 0.6177071332931519, "grad_norm": 0.7476180195808411, "global_step": 14, "epoch": 0, "lr": 0.002} +{"train_loss": 0.2484627217054367, "train_loss_bc": 0.24271151423454285, "train_loss_llm": 0.5751214623451233, "grad_norm": 0.8759933710098267, "global_step": 15, "epoch": 0, "lr": 0.002} +{"train_loss": 0.20768630504608154, "train_loss_bc": 0.20276379585266113, "train_loss_llm": 0.49225085973739624, "grad_norm": 0.9921573996543884, "global_step": 16, "epoch": 0, "lr": 0.003} +{"train_loss": 0.16910794377326965, "train_loss_bc": 0.16317197680473328, "train_loss_llm": 0.5935962796211243, "grad_norm": 0.11413145065307617, "global_step": 17, "epoch": 0, "lr": 0.003} +{"train_loss": 0.1472136378288269, "train_loss_bc": 0.14170503616333008, "train_loss_llm": 0.5508600473403931, "grad_norm": 0.2181655466556549, "global_step": 18, "epoch": 0, "lr": 0.003} +{"train_loss": 0.09437470138072968, "train_loss_bc": 0.08951498568058014, "train_loss_llm": 0.4859713315963745, "grad_norm": 0.2965621054172516, "global_step": 19, "epoch": 0, "lr": 0.003} +{"train_loss": 0.14634960889816284, "train_loss_bc": 0.14015674591064453, "train_loss_llm": 0.6192870140075684, "grad_norm": 0.399164617061615, "global_step": 20, "epoch": 0, "lr": 0.003} +{"train_loss": 0.13075421750545502, "train_loss_bc": 0.12502902746200562, "train_loss_llm": 0.5725185871124268, "grad_norm": 0.49499645829200745, "global_step": 21, "epoch": 0, "lr": 0.003} +{"train_loss": 0.1632406860589981, "train_loss_bc": 0.15791185200214386, "train_loss_llm": 0.5328830480575562, "grad_norm": 0.6078411936759949, "global_step": 22, "epoch": 0, "lr": 0.003} +{"train_loss": 0.16032128036022186, "train_loss_bc": 0.1541915237903595, "train_loss_llm": 0.6129759550094604, "grad_norm": 0.7181513905525208, "global_step": 23, "epoch": 0, "lr": 0.003} +{"train_loss": 0.10194464772939682, "train_loss_bc": 0.09674602746963501, "train_loss_llm": 0.5198622345924377, "grad_norm": 0.8007318377494812, "global_step": 24, "epoch": 0, "lr": 0.004} +{"train_loss": 0.04645621404051781, "train_loss_bc": 0.04119991511106491, "train_loss_llm": 0.5256298780441284, "grad_norm": 0.04878818616271019, "global_step": 25, "epoch": 0, "lr": 0.004} +{"train_loss": 0.05316489189863205, "train_loss_bc": 0.04852021113038063, "train_loss_llm": 0.46446824073791504, "grad_norm": 0.10567886382341385, "global_step": 26, "epoch": 0, "lr": 0.004} +{"train_loss": 0.034993816167116165, "train_loss_bc": 0.03099265694618225, "train_loss_llm": 0.40011608600616455, "grad_norm": 0.12614615261554718, "global_step": 27, "epoch": 0, "lr": 0.004} +{"train_loss": 0.05056390166282654, "train_loss_bc": 0.04542642831802368, "train_loss_llm": 0.5137471556663513, "grad_norm": 0.17804424464702606, "global_step": 28, "epoch": 0, "lr": 0.004} +{"train_loss": 0.040129225701093674, "train_loss_bc": 0.03615850210189819, "train_loss_llm": 0.3970724642276764, "grad_norm": 0.21957509219646454, "global_step": 29, "epoch": 0, "lr": 0.004} +{"train_loss": 0.06979431211948395, "train_loss_bc": 0.06506022810935974, "train_loss_llm": 0.47340837121009827, "grad_norm": 0.30375877022743225, "global_step": 30, "epoch": 0, "lr": 0.004} +{"train_loss": 0.05452805757522583, "train_loss_bc": 0.050960805267095566, "train_loss_llm": 0.3567253649234772, "grad_norm": 0.3601897656917572, "global_step": 31, "epoch": 0, "lr": 0.004} +{"train_loss": 0.05965254083275795, "train_loss_bc": 0.055447064340114594, "train_loss_llm": 0.4205475151538849, "grad_norm": 0.4274352192878723, "global_step": 32, "epoch": 0, "lr": 0.005} +{"train_loss": 0.06257618218660355, "train_loss_bc": 0.05625780671834946, "train_loss_llm": 0.6318378448486328, "grad_norm": 0.09260464459657669, "global_step": 33, "epoch": 0, "lr": 0.005} +{"train_loss": 0.05445178598165512, "train_loss_bc": 0.04902859777212143, "train_loss_llm": 0.5423187017440796, "grad_norm": 0.16763924062252045, "global_step": 34, "epoch": 0, "lr": 0.005} +{"train_loss": 0.06851150095462799, "train_loss_bc": 0.06271672248840332, "train_loss_llm": 0.5794777870178223, "grad_norm": 0.2678099274635315, "global_step": 35, "epoch": 0, "lr": 0.005} +{"train_loss": 0.0630837082862854, "train_loss_bc": 0.0575467087328434, "train_loss_llm": 0.553699791431427, "grad_norm": 0.3552546501159668, "global_step": 36, "epoch": 0, "lr": 0.005} +{"train_loss": 0.040140487253665924, "train_loss_bc": 0.03421059995889664, "train_loss_llm": 0.5929888486862183, "grad_norm": 0.41354402899742126, "global_step": 37, "epoch": 0, "lr": 0.005} +{"train_loss": 0.06981470435857773, "train_loss_bc": 0.063104547560215, "train_loss_llm": 0.6710153818130493, "grad_norm": 0.5134375095367432, "global_step": 38, "epoch": 0, "lr": 0.005} +{"train_loss": 0.051894403994083405, "train_loss_bc": 0.045866355299949646, "train_loss_llm": 0.6028048992156982, "grad_norm": 0.5825293660163879, "global_step": 39, "epoch": 0, "lr": 0.005} +{"train_loss": 0.04342593997716904, "train_loss_bc": 0.03640042245388031, "train_loss_llm": 0.7025519013404846, "grad_norm": 0.6445399522781372, "global_step": 40, "epoch": 0, "lr": 0.006} +{"train_loss": 0.1558080017566681, "train_loss_bc": 0.15039610862731934, "train_loss_llm": 0.5411889553070068, "grad_norm": 0.20307017862796783, "global_step": 41, "epoch": 0, "lr": 0.006} +{"train_loss": 0.12238138169050217, "train_loss_bc": 0.11733964830636978, "train_loss_llm": 0.5041730403900146, "grad_norm": 0.3785540461540222, "global_step": 42, "epoch": 0, "lr": 0.006} +{"train_loss": 0.11476962268352509, "train_loss_bc": 0.1102944016456604, "train_loss_llm": 0.44752180576324463, "grad_norm": 0.5496576428413391, "global_step": 43, "epoch": 0, "lr": 0.006} +{"train_loss": 0.1318601667881012, "train_loss_bc": 0.12625660002231598, "train_loss_llm": 0.5603567957878113, "grad_norm": 0.7342697381973267, "global_step": 44, "epoch": 0, "lr": 0.006} +{"train_loss": 0.15008734166622162, "train_loss_bc": 0.14480489492416382, "train_loss_llm": 0.5282450914382935, "grad_norm": 0.9383558630943298, "global_step": 45, "epoch": 0, "lr": 0.006} +{"train_loss": 0.11853287369012833, "train_loss_bc": 0.11271888017654419, "train_loss_llm": 0.5813996195793152, "grad_norm": 1.1123522520065308, "global_step": 46, "epoch": 0, "lr": 0.006} +{"train_loss": 0.14414051175117493, "train_loss_bc": 0.13898390531539917, "train_loss_llm": 0.5156602263450623, "grad_norm": 1.3082720041275024, "global_step": 47, "epoch": 0, "lr": 0.006} +{"train_loss": 0.1536247432231903, "train_loss_bc": 0.14848382771015167, "train_loss_llm": 0.5140920877456665, "grad_norm": 1.5149050951004028, "global_step": 48, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.25954943895339966, "train_loss_bc": 0.25265026092529297, "train_loss_llm": 0.6899186372756958, "grad_norm": 0.3054397702217102, "global_step": 49, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.1506877839565277, "train_loss_bc": 0.1453518569469452, "train_loss_llm": 0.5335921049118042, "grad_norm": 0.5257424116134644, "global_step": 50, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.17754197120666504, "train_loss_bc": 0.17331534624099731, "train_loss_llm": 0.4226621985435486, "grad_norm": 0.769081711769104, "global_step": 51, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.27337175607681274, "train_loss_bc": 0.26682397723197937, "train_loss_llm": 0.6547775268554688, "grad_norm": 1.0860713720321655, "global_step": 52, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.21706189215183258, "train_loss_bc": 0.21164150536060333, "train_loss_llm": 0.5420382022857666, "grad_norm": 1.3574727773666382, "global_step": 53, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.16595229506492615, "train_loss_bc": 0.16188554465770721, "train_loss_llm": 0.4066758155822754, "grad_norm": 1.5899840593338013, "global_step": 54, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.23229315876960754, "train_loss_bc": 0.22762833535671234, "train_loss_llm": 0.4664822220802307, "grad_norm": 1.8754494190216064, "global_step": 55, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.21556805074214935, "train_loss_bc": 0.2103624939918518, "train_loss_llm": 0.5205552577972412, "grad_norm": 2.147486448287964, "global_step": 56, "epoch": 0, "lr": 0.008} +{"train_loss": 0.22826582193374634, "train_loss_bc": 0.22174005210399628, "train_loss_llm": 0.6525774002075195, "grad_norm": 0.286575049161911, "global_step": 57, "epoch": 0, "lr": 0.008} +{"train_loss": 0.20953819155693054, "train_loss_bc": 0.203176349401474, "train_loss_llm": 0.6361845135688782, "grad_norm": 0.5594003200531006, "global_step": 58, "epoch": 0, "lr": 0.008} +{"train_loss": 0.191473588347435, "train_loss_bc": 0.18566101789474487, "train_loss_llm": 0.581256628036499, "grad_norm": 0.8172082304954529, "global_step": 59, "epoch": 0, "lr": 0.008} +{"train_loss": 0.17888422310352325, "train_loss_bc": 0.17266017198562622, "train_loss_llm": 0.6224054098129272, "grad_norm": 1.0602154731750488, "global_step": 60, "epoch": 0, "lr": 0.008} +{"train_loss": 0.21835987269878387, "train_loss_bc": 0.21199063956737518, "train_loss_llm": 0.6369228959083557, "grad_norm": 1.3346713781356812, "global_step": 61, "epoch": 0, "lr": 0.008} +{"train_loss": 0.17873793840408325, "train_loss_bc": 0.17244993150234222, "train_loss_llm": 0.6288003921508789, "grad_norm": 1.583105206489563, "global_step": 62, "epoch": 0, "lr": 0.008} +{"train_loss": 0.14904041588306427, "train_loss_bc": 0.14287304878234863, "train_loss_llm": 0.616736888885498, "grad_norm": 1.8050798177719116, "global_step": 63, "epoch": 0, "lr": 0.008} +{"train_loss": 0.22122563421726227, "train_loss_bc": 0.215244859457016, "train_loss_llm": 0.5980769395828247, "grad_norm": 2.082054615020752, "global_step": 64, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.11144096404314041, "train_loss_bc": 0.10432037711143494, "train_loss_llm": 0.712058424949646, "grad_norm": 0.1753779500722885, "global_step": 65, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.11379032582044601, "train_loss_bc": 0.107419952750206, "train_loss_llm": 0.6370369791984558, "grad_norm": 0.3535049855709076, "global_step": 66, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.10985075682401657, "train_loss_bc": 0.1022319421172142, "train_loss_llm": 0.7618812322616577, "grad_norm": 0.5256584286689758, "global_step": 67, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.18938198685646057, "train_loss_bc": 0.18246878683567047, "train_loss_llm": 0.691320538520813, "grad_norm": 0.7720930576324463, "global_step": 68, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.10004343092441559, "train_loss_bc": 0.09400247782468796, "train_loss_llm": 0.6040955781936646, "grad_norm": 0.939155638217926, "global_step": 69, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.11703117191791534, "train_loss_bc": 0.11094395071268082, "train_loss_llm": 0.6087222099304199, "grad_norm": 1.1172370910644531, "global_step": 70, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.13404561579227448, "train_loss_bc": 0.12686075270175934, "train_loss_llm": 0.7184867262840271, "grad_norm": 1.312468409538269, "global_step": 71, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.13330930471420288, "train_loss_bc": 0.12657678127288818, "train_loss_llm": 0.6732516288757324, "grad_norm": 1.5088775157928467, "global_step": 72, "epoch": 0, "lr": 0.01} +{"train_loss": 0.05257038772106171, "train_loss_bc": 0.04637575149536133, "train_loss_llm": 0.6194634437561035, "grad_norm": 0.09083625674247742, "global_step": 73, "epoch": 0, "lr": 0.01} +{"train_loss": 0.06475914269685745, "train_loss_bc": 0.057880476117134094, "train_loss_llm": 0.6878665089607239, "grad_norm": 0.1966981440782547, "global_step": 74, "epoch": 0, "lr": 0.01} +{"train_loss": 0.04975426197052002, "train_loss_bc": 0.043193425983190536, "train_loss_llm": 0.6560835242271423, "grad_norm": 0.28462910652160645, "global_step": 75, "epoch": 0, "lr": 0.01} +{"train_loss": 0.04952416196465492, "train_loss_bc": 0.04294995218515396, "train_loss_llm": 0.6574209332466125, "grad_norm": 0.368166983127594, "global_step": 76, "epoch": 0, "lr": 0.01} +{"train_loss": 0.07074079662561417, "train_loss_bc": 0.06346137821674347, "train_loss_llm": 0.7279415130615234, "grad_norm": 0.484068363904953, "global_step": 77, "epoch": 0, "lr": 0.01} +{"train_loss": 0.04157562926411629, "train_loss_bc": 0.034751974046230316, "train_loss_llm": 0.6823655962944031, "grad_norm": 0.5569941997528076, "global_step": 78, "epoch": 0, "lr": 0.01} +{"train_loss": 0.06484629958868027, "train_loss_bc": 0.05785399675369263, "train_loss_llm": 0.6992301940917969, "grad_norm": 0.6628190279006958, "global_step": 79, "epoch": 0, "lr": 0.01} +{"train_loss": 0.038789354264736176, "train_loss_bc": 0.03276902064681053, "train_loss_llm": 0.6020334959030151, "grad_norm": 0.7350778579711914, "global_step": 80, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.03389202430844307, "train_loss_bc": 0.02817351743578911, "train_loss_llm": 0.571850597858429, "grad_norm": 0.04861941188573837, "global_step": 81, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.03024495765566826, "train_loss_bc": 0.02487185411155224, "train_loss_llm": 0.5373104214668274, "grad_norm": 0.08896133303642273, "global_step": 82, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.029436565935611725, "train_loss_bc": 0.024766096845269203, "train_loss_llm": 0.46704691648483276, "grad_norm": 0.13158170878887177, "global_step": 83, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.03704115003347397, "train_loss_bc": 0.03144294396042824, "train_loss_llm": 0.5598207712173462, "grad_norm": 0.18903131783008575, "global_step": 84, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.031894855201244354, "train_loss_bc": 0.026735499501228333, "train_loss_llm": 0.5159357786178589, "grad_norm": 0.22145399451255798, "global_step": 85, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.03053618222475052, "train_loss_bc": 0.025796514004468918, "train_loss_llm": 0.47396671772003174, "grad_norm": 0.2594376802444458, "global_step": 86, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.025953643023967743, "train_loss_bc": 0.021002870053052902, "train_loss_llm": 0.49507731199264526, "grad_norm": 0.28883251547813416, "global_step": 87, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.03711831569671631, "train_loss_bc": 0.03182109445333481, "train_loss_llm": 0.5297219753265381, "grad_norm": 0.33612799644470215, "global_step": 88, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.07443847507238388, "train_loss_bc": 0.06750228255987167, "train_loss_llm": 0.6936193704605103, "grad_norm": 0.1032935231924057, "global_step": 89, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.06578436493873596, "train_loss_bc": 0.059477001428604126, "train_loss_llm": 0.6307359933853149, "grad_norm": 0.19785623252391815, "global_step": 90, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.05691196769475937, "train_loss_bc": 0.05067047104239464, "train_loss_llm": 0.6241495013237, "grad_norm": 0.28224730491638184, "global_step": 91, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.07031725347042084, "train_loss_bc": 0.06331950426101685, "train_loss_llm": 0.699774980545044, "grad_norm": 0.38025128841400146, "global_step": 92, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.06619272381067276, "train_loss_bc": 0.059530019760131836, "train_loss_llm": 0.6662706136703491, "grad_norm": 0.47631222009658813, "global_step": 93, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.050842322409152985, "train_loss_bc": 0.04463043063879013, "train_loss_llm": 0.6211893558502197, "grad_norm": 0.5518149137496948, "global_step": 94, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.05087399110198021, "train_loss_bc": 0.044860679656267166, "train_loss_llm": 0.6013312339782715, "grad_norm": 0.6292504668235779, "global_step": 95, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.06841012090444565, "train_loss_bc": 0.061625488102436066, "train_loss_llm": 0.6784631013870239, "grad_norm": 0.726737916469574, "global_step": 96, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.08856374025344849, "train_loss_bc": 0.08115855604410172, "train_loss_llm": 0.7405182123184204, "grad_norm": 0.11317727714776993, "global_step": 97, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.08638611435890198, "train_loss_bc": 0.07939188182353973, "train_loss_llm": 0.6994235515594482, "grad_norm": 0.22164146602153778, "global_step": 98, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.08941305428743362, "train_loss_bc": 0.0817980170249939, "train_loss_llm": 0.7615037560462952, "grad_norm": 0.32890111207962036, "global_step": 99, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.07866586744785309, "train_loss_bc": 0.07075173407793045, "train_loss_llm": 0.7914135456085205, "grad_norm": 0.4279417097568512, "global_step": 100, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.09740469604730606, "train_loss_bc": 0.0890614315867424, "train_loss_llm": 0.8343262672424316, "grad_norm": 0.5465472340583801, "global_step": 101, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.07890348881483078, "train_loss_bc": 0.07148407399654388, "train_loss_llm": 0.7419418096542358, "grad_norm": 0.6493978500366211, "global_step": 102, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.06637918949127197, "train_loss_bc": 0.05943997576832771, "train_loss_llm": 0.6939213275909424, "grad_norm": 0.736656665802002, "global_step": 103, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.052137844264507294, "train_loss_bc": 0.04496845602989197, "train_loss_llm": 0.716938853263855, "grad_norm": 0.8118408918380737, "global_step": 104, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.06986512988805771, "train_loss_bc": 0.06309865415096283, "train_loss_llm": 0.6766473650932312, "grad_norm": 0.08536022901535034, "global_step": 105, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.0901651680469513, "train_loss_bc": 0.08455254882574081, "train_loss_llm": 0.5612622499465942, "grad_norm": 0.19402463734149933, "global_step": 106, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.08825574815273285, "train_loss_bc": 0.08174335211515427, "train_loss_llm": 0.6512394547462463, "grad_norm": 0.29752182960510254, "global_step": 107, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.07733944058418274, "train_loss_bc": 0.07105374336242676, "train_loss_llm": 0.628569483757019, "grad_norm": 0.39171040058135986, "global_step": 108, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.06732399016618729, "train_loss_bc": 0.06240474805235863, "train_loss_llm": 0.49192410707473755, "grad_norm": 0.4783252775669098, "global_step": 109, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.06321073323488235, "train_loss_bc": 0.05660167708992958, "train_loss_llm": 0.6609058380126953, "grad_norm": 0.558458149433136, "global_step": 110, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.06905204057693481, "train_loss_bc": 0.06339387595653534, "train_loss_llm": 0.5658166408538818, "grad_norm": 0.6481609344482422, "global_step": 111, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.07884093374013901, "train_loss_bc": 0.07224734127521515, "train_loss_llm": 0.6593592166900635, "grad_norm": 0.7421054840087891, "global_step": 112, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.07946255803108215, "train_loss_bc": 0.07475702464580536, "train_loss_llm": 0.4705533981323242, "grad_norm": 0.10471871495246887, "global_step": 113, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.07216629385948181, "train_loss_bc": 0.06603223085403442, "train_loss_llm": 0.6134059429168701, "grad_norm": 0.19753926992416382, "global_step": 114, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.06510302424430847, "train_loss_bc": 0.057828500866889954, "train_loss_llm": 0.7274521589279175, "grad_norm": 0.28359219431877136, "global_step": 115, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.06222425401210785, "train_loss_bc": 0.055748678743839264, "train_loss_llm": 0.6475574970245361, "grad_norm": 0.36533209681510925, "global_step": 116, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.0845273807644844, "train_loss_bc": 0.07734745740890503, "train_loss_llm": 0.7179924249649048, "grad_norm": 0.4720841646194458, "global_step": 117, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.06714431196451187, "train_loss_bc": 0.06066868081688881, "train_loss_llm": 0.6475629210472107, "grad_norm": 0.5596445798873901, "global_step": 118, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.07048200070858002, "train_loss_bc": 0.06500747799873352, "train_loss_llm": 0.5474520921707153, "grad_norm": 0.6513513326644897, "global_step": 119, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.04110037535429001, "train_loss_bc": 0.03525649011135101, "train_loss_llm": 0.5843884944915771, "grad_norm": 0.7102450132369995, "global_step": 120, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.04190563037991524, "train_loss_bc": 0.03765689581632614, "train_loss_llm": 0.4248734712600708, "grad_norm": 0.06533454358577728, "global_step": 121, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.04612841457128525, "train_loss_bc": 0.04169066250324249, "train_loss_llm": 0.4437751770019531, "grad_norm": 0.13389350473880768, "global_step": 122, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.06232012063264847, "train_loss_bc": 0.057958535850048065, "train_loss_llm": 0.436158686876297, "grad_norm": 0.22540993988513947, "global_step": 123, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.05091412365436554, "train_loss_bc": 0.04628019779920578, "train_loss_llm": 0.4633924067020416, "grad_norm": 0.29657596349716187, "global_step": 124, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.04201853275299072, "train_loss_bc": 0.03731508180499077, "train_loss_llm": 0.47034499049186707, "grad_norm": 0.3558724820613861, "global_step": 125, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.06030768156051636, "train_loss_bc": 0.0569755993783474, "train_loss_llm": 0.3332084119319916, "grad_norm": 0.4466772675514221, "global_step": 126, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.049573902040719986, "train_loss_bc": 0.044726163148880005, "train_loss_llm": 0.48477375507354736, "grad_norm": 0.518007755279541, "global_step": 127, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.05068175494670868, "train_loss_bc": 0.04603324085474014, "train_loss_llm": 0.46485158801078796, "grad_norm": 0.584708034992218, "global_step": 128, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.03280109167098999, "train_loss_bc": 0.026645543053746223, "train_loss_llm": 0.6155548095703125, "grad_norm": 0.04615609720349312, "global_step": 129, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.02815183810889721, "train_loss_bc": 0.022307250648736954, "train_loss_llm": 0.5844587087631226, "grad_norm": 0.08306025713682175, "global_step": 130, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.04145112261176109, "train_loss_bc": 0.03547768294811249, "train_loss_llm": 0.597343921661377, "grad_norm": 0.1467656046152115, "global_step": 131, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.03268067538738251, "train_loss_bc": 0.026548977941274643, "train_loss_llm": 0.6131698489189148, "grad_norm": 0.1975640058517456, "global_step": 132, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.02972070872783661, "train_loss_bc": 0.024114008992910385, "train_loss_llm": 0.5606698989868164, "grad_norm": 0.2313450276851654, "global_step": 133, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.034362196922302246, "train_loss_bc": 0.028685620054602623, "train_loss_llm": 0.5676577091217041, "grad_norm": 0.2870166003704071, "global_step": 134, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.026356279850006104, "train_loss_bc": 0.021087775006890297, "train_loss_llm": 0.5268504023551941, "grad_norm": 0.32789376378059387, "global_step": 135, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.02352694608271122, "train_loss_bc": 0.017692333087325096, "train_loss_llm": 0.5834612846374512, "grad_norm": 0.3600025475025177, "global_step": 136, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.021489372476935387, "train_loss_bc": 0.015556419268250465, "train_loss_llm": 0.5932953953742981, "grad_norm": 0.029839487746357918, "global_step": 137, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.022915281355381012, "train_loss_bc": 0.016903359442949295, "train_loss_llm": 0.6011921167373657, "grad_norm": 0.0649353489279747, "global_step": 138, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.028618421405553818, "train_loss_bc": 0.021326089277863503, "train_loss_llm": 0.7292331457138062, "grad_norm": 0.09133722633123398, "global_step": 139, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.022449012845754623, "train_loss_bc": 0.016372717916965485, "train_loss_llm": 0.6076295375823975, "grad_norm": 0.11012815684080124, "global_step": 140, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.029746074229478836, "train_loss_bc": 0.023105649277567863, "train_loss_llm": 0.6640425324440002, "grad_norm": 0.13848648965358734, "global_step": 141, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.024118199944496155, "train_loss_bc": 0.018623564392328262, "train_loss_llm": 0.5494635105133057, "grad_norm": 0.1677691638469696, "global_step": 142, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.02615225501358509, "train_loss_bc": 0.020367056131362915, "train_loss_llm": 0.5785199403762817, "grad_norm": 0.2057863473892212, "global_step": 143, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.02474672719836235, "train_loss_bc": 0.01899828016757965, "train_loss_llm": 0.5748447179794312, "grad_norm": 0.2312604933977127, "global_step": 144, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.027259020134806633, "train_loss_bc": 0.022779621183872223, "train_loss_llm": 0.44793984293937683, "grad_norm": 0.03558708727359772, "global_step": 145, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.026615606620907784, "train_loss_bc": 0.022006575018167496, "train_loss_llm": 0.4609031677246094, "grad_norm": 0.07820506393909454, "global_step": 146, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.025012901052832603, "train_loss_bc": 0.020857524126768112, "train_loss_llm": 0.4155377149581909, "grad_norm": 0.11317337304353714, "global_step": 147, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.04018259048461914, "train_loss_bc": 0.034745171666145325, "train_loss_llm": 0.5437417030334473, "grad_norm": 0.1679946333169937, "global_step": 148, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.02269160747528076, "train_loss_bc": 0.018508322536945343, "train_loss_llm": 0.4183286130428314, "grad_norm": 0.1906110793352127, "global_step": 149, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.02399151585996151, "train_loss_bc": 0.01900067925453186, "train_loss_llm": 0.49908363819122314, "grad_norm": 0.2185346633195877, "global_step": 150, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.025642897933721542, "train_loss_bc": 0.01971365511417389, "train_loss_llm": 0.5929243564605713, "grad_norm": 0.256346195936203, "global_step": 151, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.0291135311126709, "train_loss_bc": 0.025170352309942245, "train_loss_llm": 0.3943178951740265, "grad_norm": 0.29621225595474243, "global_step": 152, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.031623922288417816, "train_loss_bc": 0.026910781860351562, "train_loss_llm": 0.4713141918182373, "grad_norm": 0.050291482359170914, "global_step": 153, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.04012516513466835, "train_loss_bc": 0.03538067638874054, "train_loss_llm": 0.47444888949394226, "grad_norm": 0.11879635602235794, "global_step": 154, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.02348470687866211, "train_loss_bc": 0.018497150391340256, "train_loss_llm": 0.49875572323799133, "grad_norm": 0.1580817550420761, "global_step": 155, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.02868938073515892, "train_loss_bc": 0.024003252387046814, "train_loss_llm": 0.4686127305030823, "grad_norm": 0.20671528577804565, "global_step": 156, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.03526413440704346, "train_loss_bc": 0.0299256332218647, "train_loss_llm": 0.5338499546051025, "grad_norm": 0.26737433671951294, "global_step": 157, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.04240602254867554, "train_loss_bc": 0.03778018057346344, "train_loss_llm": 0.46258440613746643, "grad_norm": 0.3377738893032074, "global_step": 158, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.04258374869823456, "train_loss_bc": 0.037083160132169724, "train_loss_llm": 0.5500588417053223, "grad_norm": 0.4062163233757019, "global_step": 159, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.03730035200715065, "train_loss_bc": 0.0325319766998291, "train_loss_llm": 0.4768376052379608, "grad_norm": 0.46591275930404663, "global_step": 160, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.03809020668268204, "train_loss_bc": 0.03346116095781326, "train_loss_llm": 0.46290475130081177, "grad_norm": 0.06493347138166428, "global_step": 161, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.027684010565280914, "train_loss_bc": 0.023107346147298813, "train_loss_llm": 0.4576663374900818, "grad_norm": 0.11537288874387741, "global_step": 162, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.03135323151946068, "train_loss_bc": 0.027640309184789658, "train_loss_llm": 0.3712920844554901, "grad_norm": 0.17289584875106812, "global_step": 163, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.0167723186314106, "train_loss_bc": 0.012659368105232716, "train_loss_llm": 0.41129496693611145, "grad_norm": 0.20604096353054047, "global_step": 164, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.03136400133371353, "train_loss_bc": 0.02577000856399536, "train_loss_llm": 0.5593993663787842, "grad_norm": 0.26496362686157227, "global_step": 165, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.04508890211582184, "train_loss_bc": 0.039878830313682556, "train_loss_llm": 0.5210072994232178, "grad_norm": 0.3450776934623718, "global_step": 166, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.02305273897945881, "train_loss_bc": 0.018481142818927765, "train_loss_llm": 0.4571595788002014, "grad_norm": 0.3923070430755615, "global_step": 167, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.028364604339003563, "train_loss_bc": 0.023760396987199783, "train_loss_llm": 0.46042078733444214, "grad_norm": 0.44835156202316284, "global_step": 168, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.03444590047001839, "train_loss_bc": 0.029000703245401382, "train_loss_llm": 0.5445197224617004, "grad_norm": 0.058123886585235596, "global_step": 169, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.031413737684488297, "train_loss_bc": 0.026274994015693665, "train_loss_llm": 0.5138742327690125, "grad_norm": 0.11113490164279938, "global_step": 170, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.026483573019504547, "train_loss_bc": 0.021075624972581863, "train_loss_llm": 0.540794849395752, "grad_norm": 0.15856337547302246, "global_step": 171, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.026955293491482735, "train_loss_bc": 0.02244516834616661, "train_loss_llm": 0.4510125517845154, "grad_norm": 0.20139986276626587, "global_step": 172, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.03016134910285473, "train_loss_bc": 0.02493642270565033, "train_loss_llm": 0.5224926471710205, "grad_norm": 0.25067630410194397, "global_step": 173, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.029931407421827316, "train_loss_bc": 0.024894531816244125, "train_loss_llm": 0.5036876797676086, "grad_norm": 0.2985679507255554, "global_step": 174, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.040666207671165466, "train_loss_bc": 0.035972896963357925, "train_loss_llm": 0.46933093667030334, "grad_norm": 0.36520418524742126, "global_step": 175, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.02875429019331932, "train_loss_bc": 0.024598199874162674, "train_loss_llm": 0.4156089723110199, "grad_norm": 0.4146167039871216, "global_step": 176, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.03293757140636444, "train_loss_bc": 0.02679475024342537, "train_loss_llm": 0.6142822504043579, "grad_norm": 0.05058354139328003, "global_step": 177, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.025597713887691498, "train_loss_bc": 0.01985827460885048, "train_loss_llm": 0.5739438533782959, "grad_norm": 0.08788882941007614, "global_step": 178, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.02832857519388199, "train_loss_bc": 0.023654501885175705, "train_loss_llm": 0.4674074053764343, "grad_norm": 0.1335342526435852, "global_step": 179, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.023435339331626892, "train_loss_bc": 0.018604157492518425, "train_loss_llm": 0.48311811685562134, "grad_norm": 0.16893020272254944, "global_step": 180, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.01497327908873558, "train_loss_bc": 0.010697474703192711, "train_loss_llm": 0.4275803864002228, "grad_norm": 0.1971648633480072, "global_step": 181, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.03193127363920212, "train_loss_bc": 0.025609299540519714, "train_loss_llm": 0.6321975588798523, "grad_norm": 0.2400187849998474, "global_step": 182, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.020016666501760483, "train_loss_bc": 0.01540004089474678, "train_loss_llm": 0.461662620306015, "grad_norm": 0.27775779366493225, "global_step": 183, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.019674330949783325, "train_loss_bc": 0.014173893257975578, "train_loss_llm": 0.5500437021255493, "grad_norm": 0.3127053380012512, "global_step": 184, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.020366767421364784, "train_loss_bc": 0.015372475609183311, "train_loss_llm": 0.49942925572395325, "grad_norm": 0.027160177007317543, "global_step": 185, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.02773042395710945, "train_loss_bc": 0.022009629756212234, "train_loss_llm": 0.5720794796943665, "grad_norm": 0.06589915603399277, "global_step": 186, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.027988407760858536, "train_loss_bc": 0.02180863544344902, "train_loss_llm": 0.6179772615432739, "grad_norm": 0.1006016656756401, "global_step": 187, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.027591602876782417, "train_loss_bc": 0.022058088332414627, "train_loss_llm": 0.5533514022827148, "grad_norm": 0.13344469666481018, "global_step": 188, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.017557095736265182, "train_loss_bc": 0.012610466219484806, "train_loss_llm": 0.494662880897522, "grad_norm": 0.1636464148759842, "global_step": 189, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.028389083221554756, "train_loss_bc": 0.021804803982377052, "train_loss_llm": 0.6584279537200928, "grad_norm": 0.20365768671035767, "global_step": 190, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.020810682326555252, "train_loss_bc": 0.014902697876095772, "train_loss_llm": 0.5907983779907227, "grad_norm": 0.23229533433914185, "global_step": 191, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.021981600672006607, "train_loss_bc": 0.016103900969028473, "train_loss_llm": 0.5877700448036194, "grad_norm": 0.2603759467601776, "global_step": 192, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.022998729720711708, "train_loss_bc": 0.01771724969148636, "train_loss_llm": 0.5281479954719543, "grad_norm": 0.034455616027116776, "global_step": 193, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.019327782094478607, "train_loss_bc": 0.014658035710453987, "train_loss_llm": 0.4669746160507202, "grad_norm": 0.0668833777308464, "global_step": 194, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.024879198521375656, "train_loss_bc": 0.0185236893594265, "train_loss_llm": 0.6355509757995605, "grad_norm": 0.08858254551887512, "global_step": 195, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.019756946712732315, "train_loss_bc": 0.014940358698368073, "train_loss_llm": 0.4816588759422302, "grad_norm": 0.11397459357976913, "global_step": 196, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.024903442710638046, "train_loss_bc": 0.018497081473469734, "train_loss_llm": 0.6406360864639282, "grad_norm": 0.14657457172870636, "global_step": 197, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.019728384912014008, "train_loss_bc": 0.014158019796013832, "train_loss_llm": 0.5570365190505981, "grad_norm": 0.1720155030488968, "global_step": 198, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.016792047768831253, "train_loss_bc": 0.011875113472342491, "train_loss_llm": 0.49169355630874634, "grad_norm": 0.20192894339561462, "global_step": 199, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.023527009412646294, "train_loss_bc": 0.017446376383304596, "train_loss_llm": 0.6080633401870728, "grad_norm": 0.2486913502216339, "global_step": 200, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.02416856773197651, "train_loss_bc": 0.018999043852090836, "train_loss_llm": 0.5169523358345032, "grad_norm": 0.029791679233312607, "global_step": 201, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.022336507216095924, "train_loss_bc": 0.017620330676436424, "train_loss_llm": 0.4716176390647888, "grad_norm": 0.056961867958307266, "global_step": 202, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.021891754120588303, "train_loss_bc": 0.01769360713660717, "train_loss_llm": 0.4198147654533386, "grad_norm": 0.07886364310979843, "global_step": 203, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.02422039769589901, "train_loss_bc": 0.01944451406598091, "train_loss_llm": 0.47758832573890686, "grad_norm": 0.11191964149475098, "global_step": 204, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.02202729508280754, "train_loss_bc": 0.016946561634540558, "train_loss_llm": 0.5080732703208923, "grad_norm": 0.12720732390880585, "global_step": 205, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.02344614453613758, "train_loss_bc": 0.01831752434372902, "train_loss_llm": 0.512861967086792, "grad_norm": 0.15643325448036194, "global_step": 206, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.021590720862150192, "train_loss_bc": 0.01635618507862091, "train_loss_llm": 0.5234535932540894, "grad_norm": 0.17777878046035767, "global_step": 207, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.02145991660654545, "train_loss_bc": 0.01685245707631111, "train_loss_llm": 0.46074602007865906, "grad_norm": 0.20263022184371948, "global_step": 208, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.023527000099420547, "train_loss_bc": 0.019175242632627487, "train_loss_llm": 0.4351757764816284, "grad_norm": 0.02783939242362976, "global_step": 209, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.02509186789393425, "train_loss_bc": 0.020386580377817154, "train_loss_llm": 0.4705287218093872, "grad_norm": 0.06272286921739578, "global_step": 210, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.023855067789554596, "train_loss_bc": 0.018275782465934753, "train_loss_llm": 0.5579285025596619, "grad_norm": 0.07670474052429199, "global_step": 211, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.024523191154003143, "train_loss_bc": 0.020061926916241646, "train_loss_llm": 0.4461265206336975, "grad_norm": 0.09239604324102402, "global_step": 212, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.023320389911532402, "train_loss_bc": 0.019183896481990814, "train_loss_llm": 0.413649320602417, "grad_norm": 0.12721095979213715, "global_step": 213, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.02343529649078846, "train_loss_bc": 0.018135903403162956, "train_loss_llm": 0.5299392938613892, "grad_norm": 0.14981500804424286, "global_step": 214, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.024323690682649612, "train_loss_bc": 0.01939486525952816, "train_loss_llm": 0.49288249015808105, "grad_norm": 0.17683890461921692, "global_step": 215, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.022757865488529205, "train_loss_bc": 0.018279647454619408, "train_loss_llm": 0.44782188534736633, "grad_norm": 0.19883114099502563, "global_step": 216, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.023337459191679955, "train_loss_bc": 0.01846153847873211, "train_loss_llm": 0.48759210109710693, "grad_norm": 0.02261751890182495, "global_step": 217, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.031769875437021255, "train_loss_bc": 0.026411594823002815, "train_loss_llm": 0.5358280539512634, "grad_norm": 0.06921491771936417, "global_step": 218, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.023737115785479546, "train_loss_bc": 0.019920486956834793, "train_loss_llm": 0.38166290521621704, "grad_norm": 0.09409084916114807, "global_step": 219, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.025979334488511086, "train_loss_bc": 0.021304704248905182, "train_loss_llm": 0.46746301651000977, "grad_norm": 0.11882251501083374, "global_step": 220, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.025000201538205147, "train_loss_bc": 0.019991103559732437, "train_loss_llm": 0.5009097456932068, "grad_norm": 0.15056195855140686, "global_step": 221, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.02242461033165455, "train_loss_bc": 0.018378354609012604, "train_loss_llm": 0.4046255350112915, "grad_norm": 0.1780581921339035, "global_step": 222, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.027378899976611137, "train_loss_bc": 0.023318542167544365, "train_loss_llm": 0.406035840511322, "grad_norm": 0.2123226374387741, "global_step": 223, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.02599770948290825, "train_loss_bc": 0.0203506201505661, "train_loss_llm": 0.564708948135376, "grad_norm": 0.24404466152191162, "global_step": 224, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.02545972168445587, "train_loss_bc": 0.020537808537483215, "train_loss_llm": 0.49219125509262085, "grad_norm": 0.0350002683699131, "global_step": 225, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.025176143273711205, "train_loss_bc": 0.020890595391392708, "train_loss_llm": 0.42855486273765564, "grad_norm": 0.06209180876612663, "global_step": 226, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.023136619478464127, "train_loss_bc": 0.01860974170267582, "train_loss_llm": 0.4526877701282501, "grad_norm": 0.09024433046579361, "global_step": 227, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.025945395231246948, "train_loss_bc": 0.021624740213155746, "train_loss_llm": 0.43206557631492615, "grad_norm": 0.12185320258140564, "global_step": 228, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.028116080909967422, "train_loss_bc": 0.02361183986067772, "train_loss_llm": 0.45042404532432556, "grad_norm": 0.16623881459236145, "global_step": 229, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.028553711250424385, "train_loss_bc": 0.02347782626748085, "train_loss_llm": 0.5075885057449341, "grad_norm": 0.19257326424121857, "global_step": 230, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.027766291052103043, "train_loss_bc": 0.022831808775663376, "train_loss_llm": 0.49344828724861145, "grad_norm": 0.22710655629634857, "global_step": 231, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.029369812458753586, "train_loss_bc": 0.024203170090913773, "train_loss_llm": 0.5166641473770142, "grad_norm": 0.2672453820705414, "global_step": 232, "epoch": 0, "lr": 0.009999988862926341} +{"train_loss": 0.019986841827630997, "train_loss_bc": 0.01582282781600952, "train_loss_llm": 0.41640135645866394, "grad_norm": 0.03304322436451912, "global_step": 233, "epoch": 0, "lr": 0.009999988862926341} +{"train_loss": 0.027561256662011147, "train_loss_bc": 0.0231167059391737, "train_loss_llm": 0.4444551467895508, "grad_norm": 0.06243205443024635, "global_step": 234, "epoch": 0, "lr": 0.009999988862926341} +{"train_loss": 0.02846396341919899, "train_loss_bc": 0.023669028654694557, "train_loss_llm": 0.47949355840682983, "grad_norm": 0.10598953068256378, "global_step": 235, "epoch": 0, "lr": 0.009999988862926341} +{"train_loss": 0.024963906034827232, "train_loss_bc": 0.020005209371447563, "train_loss_llm": 0.49586963653564453, "grad_norm": 0.1454334259033203, "global_step": 236, "epoch": 0, "lr": 0.009999988862926341} diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/train.log b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/train.log new file mode 100644 index 0000000000000000000000000000000000000000..a2b6f38ae4151e117275b55aa774d3f547a22004 --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/train.log @@ -0,0 +1,14 @@ +[2026-01-21 12:18:20,592][numexpr.utils][INFO] - Note: detected 224 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +[2026-01-21 12:18:20,592][numexpr.utils][INFO] - Note: NumExpr detected 224 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16. +[2026-01-21 12:18:20,592][numexpr.utils][INFO] - NumExpr defaulting to 16 threads. +[2026-01-21 12:18:26,191][datasets][INFO] - PyTorch version 2.2.2 available. +[2026-01-21 12:18:26,192][datasets][INFO] - TensorFlow version 2.15.1 available. +[2026-01-21 12:18:26,193][datasets][INFO] - JAX version 0.4.30 available. +[2026-01-21 12:18:49,867][root][INFO] - running build_ext +[2026-01-21 12:18:49,870][root][INFO] - building 'mujoco_py.cymj' extension +[2026-01-21 12:18:49,872][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.o -fopenmp -w +[2026-01-21 12:19:17,011][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.o -fopenmp -w +[2026-01-21 12:19:17,219][root][INFO] - creating /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/lib.linux-x86_64-cpython-39/mujoco_py +[2026-01-21 12:19:17,222][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -shared -Wl,-rpath,/home/u1131674/.conda/envs/llm-bc/lib -Wl,-rpath-link,/home/u1131674/.conda/envs/llm-bc/lib -L/home/u1131674/.conda/envs/llm-bc/lib -L/home/u1131674/.conda/envs/llm-bc/lib -Wl,-rpath,/home/u1131674/.conda/envs/llm-bc/lib -Wl,-rpath-link,/home/u1131674/.conda/envs/llm-bc/lib -L/home/u1131674/.conda/envs/llm-bc/lib /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.o -L/home/u1131674/.mujoco/mujoco210/bin -Wl,--enable-new-dtags,-rpath,/home/u1131674/.mujoco/mujoco210/bin -lmujoco210 -lglewegl -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/lib.linux-x86_64-cpython-39/mujoco_py/cymj.cpython-39-x86_64-linux-gnu.so -fopenmp +[2026-01-21 12:19:18,581][absl][INFO] - MUJOCO_GL=osmesa, attempting to import specified OpenGL backend. +[2026-01-21 12:19:18,590][absl][INFO] - MuJoCo library version is: 2.3.7 diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..be3b030f9802cf4921193e9e4af1f91d32004dbb --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-01-21T12:19:19.67691431+08:00","level":"INFO","msg":"using version","core version":"0.18.6"} +{"time":"2026-01-21T12:19:19.676924583+08:00","level":"INFO","msg":"created symlink","path":"/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log"} +{"time":"2026-01-21T12:19:19.791067511+08:00","level":"INFO","msg":"created new stream","id":"9puzigbg"} +{"time":"2026-01-21T12:19:19.791113731+08:00","level":"INFO","msg":"stream: started","id":"9puzigbg"} +{"time":"2026-01-21T12:19:19.791148479+08:00","level":"INFO","msg":"sender: started","stream_id":"9puzigbg"} +{"time":"2026-01-21T12:19:19.791138771+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"9puzigbg"}} +{"time":"2026-01-21T12:19:19.791131709+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"9puzigbg"}} +{"time":"2026-01-21T12:19:20.473667126+08:00","level":"INFO","msg":"Starting system monitor"} +{"time":"2026-01-21T12:20:31.202511022+08:00","level":"INFO","msg":"stream: closing","id":"9puzigbg"} +{"time":"2026-01-21T12:20:31.202606065+08:00","level":"INFO","msg":"Stopping system monitor"} +{"time":"2026-01-21T12:20:31.262777289+08:00","level":"INFO","msg":"Stopped system monitor"} diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug.log b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..7a1b1b564682a68bebe83c612ed398ce349a0390 --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug.log @@ -0,0 +1,27 @@ +2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6 +2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Configure stats pid to 2070718 +2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /home/u1131674/.config/wandb/settings +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /work/u1131674/LLM-BC/wandb/settings +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None} +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/work/u1131674/LLM-BC/train.py', 'program': '/work/u1131674/LLM-BC/./train.py'} +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying login settings: {} +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():533] Logging user logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():534] Logging internal logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():619] calling init triggers +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():626] wandb.init called with sweep_config: {} +config: {'name': 'train_llmbc_lowdim', '_target_': 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace', 'obs_dim': 9, 'action_dim': 4, 'task_name': 'box-close-v2', 'exp_name': 'default', 'model_name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'llm_orig_expert_feedback': True, 'llm_do_sample': False, 'policy': {'_target_': 'llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy', 'model': {'_target_': 'llmbc.model.policy.policy_mlp.PolicyMLP', 'input_size': 9, 'hidden_size': [256, 256], 'output_size': 4, 'activation': 'relu', 'n_obs_steps': 1, 'n_action_steps': 1}, 'obs_dim': 9, 'action_dim': 4, 'llm_discriminator': {'_target_': 'llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator', 'task_id': 'box-close-v2', 'llm_translator': {'_target_': 'llmbc.translator.llm_translator.LLMTranslator', 'cfg': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}, 'obs_dim': 9, 'action_dim': 4, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1}}, 'loss_bc_weight': 1.0, 'loss_llm_weight': 0.01, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'normalize_llm_loss': True}, 'dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.AdamW', 'lr': 0.01, 'betas': [0.95, 0.999], 'eps': 1e-08, 'weight_decay': 1e-06}, 'training': {'device': 'cuda:0', 'seed': 42, 'debug': False, 'resume': False, 'lr_scheduler': 'cosine', 'lr_warmup_steps': 10, 'num_epochs': 1001, 'gradient_accumulate_every': 8, 'grad_norm_clip': 0.5, 'rollout_every': 5, 'checkpoint_every': 5, 'val_every': 1, 'sample_every': 5, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'box-close-v2-training', 'resume': True, 'mode': 'online', 'name': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2', 'tags': ['train_llmbc_lowdim', 'box-close-v2', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2', 'wandb_name_base': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2'}, 'task': {'name': 'box-close-v2', 'obs_dim': 9, 'action_dim': 4, 'env_runner': {'_target_': 'llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner', 'env_name': 'llf-metaworld-box-close-v2', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 30, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.9}, 'dataset': {'_target_': 'llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset', 'data_path': 'datasets/box-close-v2.pt', 'data_path2': 'datasets/box-close-v2.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': True}, 'instructor': {'_target_': 'llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor'}}, 'llm': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}} +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():669] starting backend +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():673] sending inform_init request +2026-01-21 12:19:19,674 INFO MainThread:2070718 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2026-01-21 12:19:19,675 INFO MainThread:2070718 [wandb_init.py:init():686] backend started and connected +2026-01-21 12:19:19,684 INFO MainThread:2070718 [wandb_init.py:init():781] updated telemetry +2026-01-21 12:19:19,759 INFO MainThread:2070718 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout +2026-01-21 12:19:20,469 INFO MainThread:2070718 [wandb_init.py:init():867] starting run threads in backend +2026-01-21 12:19:20,990 INFO MainThread:2070718 [wandb_run.py:_console_start():2451] atexit reg +2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2299] redirect: wrap_raw +2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2364] Wrapping output streams. +2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2389] Redirects installed. +2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_init.py:init():911] run started, returning control to user process +2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2'} +2026-01-21 12:20:31,202 WARNING MsgRouterThr:2070718 [router.py:message_loop():75] message_loop has been closed diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/config.yaml b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ac4eaea062d227890bdd1a9cc74e88c43c59ebf --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/config.yaml @@ -0,0 +1,271 @@ +_target_: + value: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace +_wandb: + value: + cli_version: 0.18.6 + m: [] + python_version: 3.9.25 + t: + "1": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 50 + - 51 + - 53 + - 55 + - 71 + - 83 + - 95 + - 98 + - 100 + - 105 + "2": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 50 + - 51 + - 53 + - 55 + - 71 + - 83 + - 95 + - 98 + - 100 + - 105 + "3": + - 13 + - 15 + - 16 + - 23 + - 55 + - 61 + "4": 3.9.25 + "5": 0.18.6 + "6": 4.47.1 + "8": + - 5 + "12": 0.18.6 + "13": linux-x86_64 +action_dim: + value: 4 +checkpoint: + value: + save_last_ckpt: true + save_last_snapshot: false + topk: + format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt + k: 5 + mode: max + monitor_key: test_success_rate +dataloader: + value: + batch_size: 16 + num_workers: 0 + persistent_workers: false + pin_memory: false + shuffle: true +exp_name: + value: default +horizon: + value: 1 +llm: + value: + causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM + checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890 + config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig + finetune_mode: orig + hydra: + job: + override_dirname: HuggingFaceTB/SmolLM2-135M-Instruct + run: + dir: data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct + llm_mode: ete-finetuned + lora_config: + bias: none + lora_alpha: 64 + lora_dropout: 0.05 + r: 32 + task_type: CAUSAL_LM + max_length: 100 + model_name: SmolLM2-135M-Instruct + name: HuggingFaceTB/SmolLM2-135M-Instruct + prompter: + _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter + use_joint_mlp_projector: true + use_joint_mlp_projector: true + use_quantization: false +llm_do_sample: + value: false +llm_orig_expert_feedback: + value: true +logging: + value: + group: null + id: null + mode: online + name: 2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2 + project: box-close-v2-training + resume: true + tags: + - train_llmbc_lowdim + - box-close-v2 + - default +model_name: + value: HuggingFaceTB/SmolLM2-135M-Instruct +multi_run: + value: + run_dir: data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2 + wandb_name_base: 2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2 +n_action_steps: + value: 1 +n_latency_steps: + value: 0 +n_obs_steps: + value: 1 +name: + value: train_llmbc_lowdim +obs_dim: + value: 9 +optimizer: + value: + _target_: torch.optim.AdamW + betas: + - 0.95 + - 0.999 + eps: 1e-08 + lr: 0.01 + weight_decay: 1e-06 +output_dir: + value: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2 +past_action_visible: + value: false +policy: + value: + _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy + action_dim: 4 + horizon: 1 + llm_discriminator: + _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator + llm_translator: + _target_: llmbc.translator.llm_translator.LLMTranslator + action_dim: 4 + cfg: + causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM + checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890 + config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig + finetune_mode: orig + hydra: + job: + override_dirname: HuggingFaceTB/SmolLM2-135M-Instruct + run: + dir: data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct + llm_mode: ete-finetuned + lora_config: + bias: none + lora_alpha: 64 + lora_dropout: 0.05 + r: 32 + task_type: CAUSAL_LM + max_length: 100 + model_name: SmolLM2-135M-Instruct + name: HuggingFaceTB/SmolLM2-135M-Instruct + prompter: + _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter + use_joint_mlp_projector: true + use_joint_mlp_projector: true + use_quantization: false + horizon: 1 + n_action_steps: 1 + n_obs_steps: 1 + obs_dim: 9 + task_id: box-close-v2 + loss_bc_weight: 1 + loss_llm_weight: 0.01 + model: + _target_: llmbc.model.policy.policy_mlp.PolicyMLP + activation: relu + hidden_size: + - 256 + - 256 + input_size: 9 + n_action_steps: 1 + n_obs_steps: 1 + output_size: 4 + n_action_steps: 1 + n_obs_steps: 1 + normalize_llm_loss: true + obs_dim: 9 +task: + value: + action_dim: 4 + dataset: + _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset + data_path: datasets/box-close-v2.pt + data_path2: datasets/box-close-v2.pt + dummy_normalizer: true + horizon: 1 + obs_eef_target: true + pad_after: 0 + pad_before: 0 + use_manual_normalizer: false + val_ratio: 0.1 + env_runner: + _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner + discount: 0.9 + env_name: llf-metaworld-box-close-v2 + feedback_type: + - hp + - hn + - fp + instruction_type: b + max_steps: 30 + n_action_steps: 1 + n_envs: 10 + n_obs_steps: 1 + n_test: 50 + n_train: 10 + visual: false + instructor: + _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor + name: box-close-v2 + obs_dim: 9 +task_name: + value: box-close-v2 +training: + value: + checkpoint_every: 5 + debug: false + device: cuda:0 + grad_norm_clip: 0.5 + gradient_accumulate_every: 8 + lr_scheduler: cosine + lr_warmup_steps: 10 + max_train_steps: null + max_val_steps: null + num_epochs: 1001 + resume: false + rollout_every: 5 + sample_every: 5 + sample_max_batch: 128 + seed: 42 + tqdm_interval_sec: 1 + val_every: 1 +val_dataloader: + value: + batch_size: 16 + num_workers: 0 + persistent_workers: false + pin_memory: false + shuffle: true diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/output.log b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..2bb12bb9f51aaf1292b97e7aa037a1aa65b3a456 --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/output.log @@ -0,0 +1,78 @@ +Eval MetaworldLowdimRunner 1/6: 0%| | 0/30 [00:00 + main() + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/main.py", line 90, in decorated_main + _run_hydra( + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 389, in _run_hydra + _run_app( + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 452, in _run_app + run_and_report( + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 213, in run_and_report + return func() + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 453, in + lambda: hydra.run( + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/hydra.py", line 119, in run + ret = run_job( + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/core/utils.py", line 186, in run_job + ret.return_value = task_function(task_cfg) + File "/work/u1131674/LLM-BC/./train.py", line 32, in main + workspace.run() + File "/work/u1131674/LLM-BC/llmbc/workspace/train_llmbc_lowdim_workspace.py", line 238, in run + runner_log = env_runner.run(policy) + File "/work/u1131674/LLM-BC/llmbc/env_runner/metaworld_lowdim_runner.py", line 153, in run + action_dict = policy.predict_action(obs_dict) + File "/work/u1131674/LLM-BC/llmbc/policy/llmbc_lowdim_policy.py", line 80, in predict_action + action_mean, action_log_std = self.model.a_mean_logstd(obs) + File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 74, in a_mean_logstd + y = self.forward(obs) + File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 68, in forward + y = super().forward(y) + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/container.py", line 217, in forward + input = module(input) + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl + return forward_call(*args, **kwargs) + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 116, in forward + return F.linear(input, self.weight, self.bias) +KeyboardInterrupt +Traceback (most recent call last): + File "/work/u1131674/LLM-BC/./train.py", line 35, in + main() + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/main.py", line 90, in decorated_main + _run_hydra( + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 389, in _run_hydra + _run_app( + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 452, in _run_app + run_and_report( + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 213, in run_and_report + return func() + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 453, in + lambda: hydra.run( + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/hydra.py", line 119, in run + ret = run_job( + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/core/utils.py", line 186, in run_job + ret.return_value = task_function(task_cfg) + File "/work/u1131674/LLM-BC/./train.py", line 32, in main + workspace.run() + File "/work/u1131674/LLM-BC/llmbc/workspace/train_llmbc_lowdim_workspace.py", line 238, in run + runner_log = env_runner.run(policy) + File "/work/u1131674/LLM-BC/llmbc/env_runner/metaworld_lowdim_runner.py", line 153, in run + action_dict = policy.predict_action(obs_dict) + File "/work/u1131674/LLM-BC/llmbc/policy/llmbc_lowdim_policy.py", line 80, in predict_action + action_mean, action_log_std = self.model.a_mean_logstd(obs) + File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 74, in a_mean_logstd + y = self.forward(obs) + File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 68, in forward + y = super().forward(y) + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/container.py", line 217, in forward + input = module(input) + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl + return forward_call(*args, **kwargs) + File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 116, in forward + return F.linear(input, self.weight, self.bias) +KeyboardInterrupt diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/requirements.txt b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f07fb3b63f6171592bfb40896c50e7c4e8ebe927 --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/requirements.txt @@ -0,0 +1,857 @@ +rpds-py==0.27.1 +typeguard==4.4.4 +flatbuffers==25.12.19 +toppra==0.6.3 +sympy==1.14.0 +tiktoken==0.8.0 +nvidia-cuda-cupti-cu12==12.1.105 +arm_pytorch_utilities==0.4.3 +pynndescent==0.6.0 +multidict==6.7.0 +fonttools==4.60.2 +numexpr==2.10.1 +cmudict==1.0.13 +PyOpenGL-accelerate==3.1.10 +gmpy2==2.2.1 +peft==0.14.0 +metaworld==2.0.0 +nvidia-cufft-cu12==11.0.2.54 +python-dateutil==2.9.0.post0 +aiosignal==1.4.0 +pexpect==4.9.0 +protobuf==4.25.8 +typing_extensions==4.15.0 +mujoco==2.3.7 +tokenizers==0.21.0 +pytorch-kinematics==0.7.5 +sniffio==1.3.1 +aiofiles==25.1.0 +mplib==0.1.1 +wcwidth==0.2.14 +Pygments==2.19.1 +anyio==4.12.1 +tensorflow-estimator==2.15.0 +filelock==3.17.0 +numpy==1.23.5 +attrs==25.4.0 +Markdown==3.9 +fsspec==2024.3.1 +libclang==18.1.1 +umap-learn==0.5.9.post2 +dill==0.3.8 +narwhals==2.15.0 +tensorboard==2.15.2 +dacite==1.9.2 +termcolor==3.1.0 +llmbc==0.0.0 +python-multipart==0.0.20 +exceptiongroup==1.3.1 +sapien==3.0.0b1 +pygame==2.6.1 +nvidia-curand-cu12==10.3.2.106 +evaluate==0.4.3 +msgpack==1.1.1 +tensorflow-probability==0.23.0 +diffusers==0.31.0 +certifi==2025.10.5 +d4rl==1.1 +pydub==0.25.1 +annotated-doc==0.0.4 +gitdb==4.0.12 +gradio_client==0.2.9 +Shapely==1.8.4 +mani_skill==3.0.0b20 +tensorflow-io-gcs-filesystem==0.37.1 +fasteners==0.20 +hjson==3.1.0 +ninja==1.13.0 +stack-data==0.6.3 +pyarrow==21.0.0 +networkx==3.2.1 +nvidia-cusparse-cu12==12.1.0.106 +pyparsing==3.3.1 +timm==1.0.22 +typing-inspection==0.4.2 +openai==2.8.1 +pybullet==3.2.6 +hydra-core==1.2.0 +gradio==3.36.1 +tensorflow==2.15.1 +asttokens==3.0.1 +importlib-metadata==5.2.0 +astunparse==1.6.3 +tifffile==2024.8.30 +annotated-types==0.7.0 +Bottleneck==1.4.2 +accelerate==1.0.1 +pytz==2025.2 +urllib3==2.5.0 +frozenlist==1.8.0 +sentry-sdk==2.50.0 +jsonschema==4.25.1 +tyro==0.9.1 +Farama-Notifications==0.0.4 +ffmpy==1.0.0 +httpx==0.28.1 +pymunk==6.2.1 +shtab==1.7.2 +glfw==2.0.0 +hf-xet==1.1.8 +omegaconf==2.2.1 +blobfile==3.0.0 +decorator==5.2.1 +cffi==1.17.1 +matplotlib-inline==0.2.1 +eval_type_backport==0.2.2 +torchaudio==2.2.2 +colorama==0.4.6 +click==8.1.8 +Cython==0.29.37 +orjson==3.11.5 +gym_bandits==0.0.2 +traitlets==5.14.3 +docker-pycreds==0.4.0 +multiprocess==0.70.15 +zipp==3.21.0 +antlr4-python3-runtime==4.9.3 +uc-micro-py==1.0.3 +mpmath==1.3.0 +idna==3.11 +aiodns==3.5.0 +charset-normalizer==3.4.4 +nvidia-nvjitlink-cu12==12.9.86 +nvidia-cuda-nvrtc-cu12==12.1.105 +seaborn==0.13.2 +pyarrow-hotfix==0.7 +pillow==11.3.0 +pyautogen==0.1.0 +requests==2.32.0 +MarkupSafe==3.0.2 +websockets==15.0.1 +nvidia-nccl-cu12==2.19.3 +pure_eval==0.2.3 +parso==0.8.5 +huggingface-hub==0.26.2 +syllables==1.0.9 +tf-agents==0.19.0 +six==1.17.0 +referencing==0.36.2 +ptyprocess==0.7.0 +platformdirs==4.4.0 +fastapi==0.128.0 +stable-baselines3==2.2.1 +av==10.0.0 +diskcache==5.6.3 +pynvml==13.0.1 +pytorch-seed==0.2.0 +zarr==2.12.0 +mdurl==0.1.2 +docstring-parser==0.16 +packaging==25.0 +numcodecs==0.12.1 +opt_einsum==3.4.0 +markdown-it-py==2.2.0 +nvidia-cuda-runtime-cu12==12.1.105 +PyWavelets==1.6.0 +datasets==2.19.0 +contourpy==1.3.0 +aiohappyeyeballs==2.6.1 +jaxlib==0.4.30 +ImageIO==2.37.2 +wandb==0.18.6 +jiter==0.12.0 +gymnasium==0.29.1 +pycryptodomex==3.23.0 +google-pasta==0.2.0 +ipython==8.18.1 +threadpoolctl==3.6.0 +py-cpuinfo==9.0.0 +bitsandbytes==0.45.0 +xxhash==3.5.0 +google-auth-oauthlib==1.2.4 +rsa==4.9.1 +rouge_score==0.1.2 +dm-control==1.0.14 +oauthlib==3.3.1 +pandas==2.3.3 +tenacity==9.1.2 +asciitree==0.3.3 +scipy==1.13.1 +jedi==0.19.2 +gast==0.7.0 +google-auth==2.47.0 +transforms3d==0.4.2 +kiwisolver==1.4.7 +matplotlib==3.7.5 +aiohttp==3.12.15 +pip==23.3.2 +imageio-ffmpeg==0.6.0 +deepspeed==0.16.1 +yarl==1.18.0 +nvidia-nvtx-cu12==12.1.105 +llfbench==0.1.0 +wheel==0.45.1 +PySocks==1.7.1 +ml-dtypes==0.3.2 +PyYAML==6.0.2 +fast_kinematics==0.2.2 +gin-config==0.5.0 +setproctitle==1.3.7 +safetensors==0.5.3 +torchvision==0.17.2 +semantic-version==2.10.0 +PyOpenGL==3.1.10 +nltk==3.9.2 +lxml==6.0.2 +pydantic==2.12.5 +tqdm==4.67.1 +keras==2.15.0 +parse==1.19.1 +linkify-it-py==2.0.3 +dm-tree==0.1.8 +requests-oauthlib==2.0.0 +scikit-learn==1.6.1 +altair==6.0.0 +Werkzeug==3.1.5 +sentencepiece==0.2.0 +uvicorn==0.39.0 +cycler==0.12.1 +transformers==4.47.1 +uvloop==0.22.1 +mkl_random==1.2.8 +GitPython==3.1.46 +regex==2025.9.1 +jax==0.4.30 +llvmlite==0.39.1 +pyasn1_modules==0.4.2 +nvidia-cudnn-cu12==8.9.2.26 +pydantic_core==2.41.5 +google-genai==1.47.0 +propcache==0.3.1 +pycares==4.10.0 +pyperclip==1.11.0 +pyasn1==0.6.2 +async-timeout==5.0.1 +psutil==7.0.0 +gym==0.23.1 +dm-env==1.6 +Jinja2==3.1.6 +sentence-transformers==3.2.1 +einops==0.4.1 +triton==2.2.0 +grpcio==1.76.0 +labmaze==1.0.6 +nvidia-ml-py==13.590.44 +brotlicffi==1.0.9.2 +smmap==5.0.2 +cloudpickle==3.1.2 +setuptools==80.9.0 +starlette==0.49.3 +prompt_toolkit==3.0.52 +wrapt==1.14.2 +h5py==3.14.0 +scikit-image==0.19.3 +joblib==1.5.3 +opencv-python==4.11.0.86 +rich==14.2.0 +trl==0.11.4 +gym-notices==0.1.0 +trimesh==4.11.1 +mdit-py-plugins==0.3.3 +distro==1.9.0 +executing==2.2.1 +mkl-service==2.4.0 +nvidia-cusolver-cu12==11.4.5.107 +FLAML==2.3.6 +mujoco-py==2.1.2.14 +h11==0.16.0 +highway-env==1.9.1 +httpcore==1.0.9 +tensorboard-data-server==0.7.2 +tzdata==2025.3 +absl-py==2.3.1 +jsonschema-specifications==2025.9.1 +numba==0.56.4 +tabulate==0.9.0 +importlib-resources==5.13.0 +pycparser==2.23 +mkl_fft==1.3.11 +torch==2.2.2 +nvidia-cublas-cu12==12.1.3.1 +rpds-py==0.27.1 +typeguard==4.4.4 +flatbuffers==25.12.19 +toppra==0.6.3 +sympy==1.14.0 +tiktoken==0.8.0 +nvidia-cuda-cupti-cu12==12.1.105 +arm_pytorch_utilities==0.4.3 +pynndescent==0.6.0 +multidict==6.7.0 +fonttools==4.60.2 +numexpr==2.10.1 +cmudict==1.0.13 +PyOpenGL-accelerate==3.1.10 +gmpy2==2.2.1 +peft==0.14.0 +metaworld==2.0.0 +nvidia-cufft-cu12==11.0.2.54 +python-dateutil==2.9.0.post0 +aiosignal==1.4.0 +pexpect==4.9.0 +protobuf==4.25.8 +typing_extensions==4.15.0 +mujoco==2.3.7 +tokenizers==0.21.0 +pytorch-kinematics==0.7.5 +sniffio==1.3.1 +aiofiles==25.1.0 +mplib==0.1.1 +wcwidth==0.2.14 +Pygments==2.19.1 +anyio==4.12.1 +tensorflow-estimator==2.15.0 +filelock==3.17.0 +numpy==1.23.5 +attrs==25.4.0 +Markdown==3.9 +fsspec==2024.3.1 +libclang==18.1.1 +umap-learn==0.5.9.post2 +dill==0.3.8 +narwhals==2.15.0 +tensorboard==2.15.2 +dacite==1.9.2 +termcolor==3.1.0 +llmbc==0.0.0 +python-multipart==0.0.20 +exceptiongroup==1.3.1 +sapien==3.0.0b1 +pygame==2.6.1 +nvidia-curand-cu12==10.3.2.106 +evaluate==0.4.3 +msgpack==1.1.1 +tensorflow-probability==0.23.0 +diffusers==0.31.0 +certifi==2025.10.5 +d4rl==1.1 +pydub==0.25.1 +annotated-doc==0.0.4 +gitdb==4.0.12 +gradio_client==0.2.9 +Shapely==1.8.4 +mani_skill==3.0.0b20 +tensorflow-io-gcs-filesystem==0.37.1 +fasteners==0.20 +hjson==3.1.0 +ninja==1.13.0 +stack-data==0.6.3 +pyarrow==21.0.0 +networkx==3.2.1 +nvidia-cusparse-cu12==12.1.0.106 +pyparsing==3.3.1 +timm==1.0.22 +typing-inspection==0.4.2 +openai==2.8.1 +pybullet==3.2.6 +hydra-core==1.2.0 +gradio==3.36.1 +tensorflow==2.15.1 +asttokens==3.0.1 +importlib-metadata==5.2.0 +astunparse==1.6.3 +tifffile==2024.8.30 +annotated-types==0.7.0 +Bottleneck==1.4.2 +accelerate==1.0.1 +pytz==2025.2 +urllib3==2.5.0 +frozenlist==1.8.0 +sentry-sdk==2.50.0 +jsonschema==4.25.1 +tyro==0.9.1 +Farama-Notifications==0.0.4 +ffmpy==1.0.0 +httpx==0.28.1 +pymunk==6.2.1 +shtab==1.7.2 +glfw==2.0.0 +hf-xet==1.1.8 +omegaconf==2.2.1 +blobfile==3.0.0 +decorator==5.2.1 +cffi==1.17.1 +matplotlib-inline==0.2.1 +eval_type_backport==0.2.2 +torchaudio==2.2.2 +colorama==0.4.6 +click==8.1.8 +Cython==0.29.37 +orjson==3.11.5 +gym_bandits==0.0.2 +traitlets==5.14.3 +docker-pycreds==0.4.0 +multiprocess==0.70.15 +zipp==3.21.0 +antlr4-python3-runtime==4.9.3 +uc-micro-py==1.0.3 +mpmath==1.3.0 +idna==3.11 +aiodns==3.5.0 +charset-normalizer==3.4.4 +nvidia-nvjitlink-cu12==12.9.86 +nvidia-cuda-nvrtc-cu12==12.1.105 +seaborn==0.13.2 +pyarrow-hotfix==0.7 +pillow==11.3.0 +pyautogen==0.1.0 +requests==2.32.0 +MarkupSafe==3.0.2 +websockets==15.0.1 +nvidia-nccl-cu12==2.19.3 +pure_eval==0.2.3 +parso==0.8.5 +huggingface-hub==0.26.2 +syllables==1.0.9 +tf-agents==0.19.0 +six==1.17.0 +referencing==0.36.2 +ptyprocess==0.7.0 +platformdirs==4.4.0 +fastapi==0.128.0 +stable-baselines3==2.2.1 +av==10.0.0 +diskcache==5.6.3 +pynvml==13.0.1 +pytorch-seed==0.2.0 +zarr==2.12.0 +mdurl==0.1.2 +docstring-parser==0.16 +packaging==25.0 +numcodecs==0.12.1 +opt_einsum==3.4.0 +markdown-it-py==2.2.0 +nvidia-cuda-runtime-cu12==12.1.105 +PyWavelets==1.6.0 +datasets==2.19.0 +contourpy==1.3.0 +aiohappyeyeballs==2.6.1 +jaxlib==0.4.30 +ImageIO==2.37.2 +wandb==0.18.6 +jiter==0.12.0 +gymnasium==0.29.1 +pycryptodomex==3.23.0 +google-pasta==0.2.0 +ipython==8.18.1 +threadpoolctl==3.6.0 +py-cpuinfo==9.0.0 +bitsandbytes==0.45.0 +xxhash==3.5.0 +google-auth-oauthlib==1.2.4 +rsa==4.9.1 +rouge_score==0.1.2 +dm-control==1.0.14 +oauthlib==3.3.1 +pandas==2.3.3 +tenacity==9.1.2 +asciitree==0.3.3 +scipy==1.13.1 +jedi==0.19.2 +gast==0.7.0 +google-auth==2.47.0 +transforms3d==0.4.2 +kiwisolver==1.4.7 +matplotlib==3.7.5 +aiohttp==3.12.15 +pip==23.3.2 +imageio-ffmpeg==0.6.0 +deepspeed==0.16.1 +yarl==1.18.0 +nvidia-nvtx-cu12==12.1.105 +llfbench==0.1.0 +wheel==0.45.1 +PySocks==1.7.1 +ml-dtypes==0.3.2 +PyYAML==6.0.2 +fast_kinematics==0.2.2 +gin-config==0.5.0 +setproctitle==1.3.7 +safetensors==0.5.3 +torchvision==0.17.2 +semantic-version==2.10.0 +PyOpenGL==3.1.10 +nltk==3.9.2 +lxml==6.0.2 +pydantic==2.12.5 +tqdm==4.67.1 +keras==2.15.0 +parse==1.19.1 +linkify-it-py==2.0.3 +dm-tree==0.1.8 +requests-oauthlib==2.0.0 +scikit-learn==1.6.1 +altair==6.0.0 +Werkzeug==3.1.5 +sentencepiece==0.2.0 +uvicorn==0.39.0 +cycler==0.12.1 +transformers==4.47.1 +uvloop==0.22.1 +mkl_random==1.2.8 +GitPython==3.1.46 +regex==2025.9.1 +jax==0.4.30 +llvmlite==0.39.1 +pyasn1_modules==0.4.2 +nvidia-cudnn-cu12==8.9.2.26 +pydantic_core==2.41.5 +google-genai==1.47.0 +propcache==0.3.1 +pycares==4.10.0 +pyperclip==1.11.0 +pyasn1==0.6.2 +async-timeout==5.0.1 +psutil==7.0.0 +gym==0.23.1 +dm-env==1.6 +Jinja2==3.1.6 +sentence-transformers==3.2.1 +einops==0.4.1 +triton==2.2.0 +grpcio==1.76.0 +labmaze==1.0.6 +nvidia-ml-py==13.590.44 +brotlicffi==1.0.9.2 +smmap==5.0.2 +cloudpickle==3.1.2 +setuptools==80.9.0 +starlette==0.49.3 +prompt_toolkit==3.0.52 +wrapt==1.14.2 +h5py==3.14.0 +scikit-image==0.19.3 +joblib==1.5.3 +opencv-python==4.11.0.86 +rich==14.2.0 +trl==0.11.4 +gym-notices==0.1.0 +trimesh==4.11.1 +mdit-py-plugins==0.3.3 +distro==1.9.0 +executing==2.2.1 +mkl-service==2.4.0 +nvidia-cusolver-cu12==11.4.5.107 +FLAML==2.3.6 +mujoco-py==2.1.2.14 +h11==0.16.0 +highway-env==1.9.1 +httpcore==1.0.9 +tensorboard-data-server==0.7.2 +tzdata==2025.3 +absl-py==2.3.1 +jsonschema-specifications==2025.9.1 +numba==0.56.4 +tabulate==0.9.0 +importlib-resources==5.13.0 +pycparser==2.23 +mkl_fft==1.3.11 +torch==2.2.2 +nvidia-cublas-cu12==12.1.3.1 +llmbc==0.0.0 +rpds-py==0.27.1 +typeguard==4.4.4 +flatbuffers==25.12.19 +toppra==0.6.3 +sympy==1.14.0 +tiktoken==0.8.0 +nvidia-cuda-cupti-cu12==12.1.105 +arm_pytorch_utilities==0.4.3 +pynndescent==0.6.0 +multidict==6.7.0 +fonttools==4.60.2 +numexpr==2.10.1 +cmudict==1.0.13 +PyOpenGL-accelerate==3.1.10 +gmpy2==2.2.1 +peft==0.14.0 +metaworld==2.0.0 +nvidia-cufft-cu12==11.0.2.54 +python-dateutil==2.9.0.post0 +aiosignal==1.4.0 +pexpect==4.9.0 +protobuf==4.25.8 +typing_extensions==4.15.0 +mujoco==2.3.7 +tokenizers==0.21.0 +pytorch-kinematics==0.7.5 +sniffio==1.3.1 +aiofiles==25.1.0 +mplib==0.1.1 +wcwidth==0.2.14 +Pygments==2.19.1 +anyio==4.12.1 +tensorflow-estimator==2.15.0 +filelock==3.17.0 +numpy==1.23.5 +attrs==25.4.0 +Markdown==3.9 +fsspec==2024.3.1 +libclang==18.1.1 +umap-learn==0.5.9.post2 +dill==0.3.8 +narwhals==2.15.0 +tensorboard==2.15.2 +dacite==1.9.2 +termcolor==3.1.0 +llmbc==0.0.0 +python-multipart==0.0.20 +exceptiongroup==1.3.1 +sapien==3.0.0b1 +pygame==2.6.1 +nvidia-curand-cu12==10.3.2.106 +evaluate==0.4.3 +msgpack==1.1.1 +tensorflow-probability==0.23.0 +diffusers==0.31.0 +certifi==2025.10.5 +d4rl==1.1 +pydub==0.25.1 +annotated-doc==0.0.4 +gitdb==4.0.12 +gradio_client==0.2.9 +Shapely==1.8.4 +mani_skill==3.0.0b20 +tensorflow-io-gcs-filesystem==0.37.1 +fasteners==0.20 +hjson==3.1.0 +ninja==1.13.0 +stack-data==0.6.3 +pyarrow==21.0.0 +networkx==3.2.1 +nvidia-cusparse-cu12==12.1.0.106 +pyparsing==3.3.1 +timm==1.0.22 +typing-inspection==0.4.2 +openai==2.8.1 +pybullet==3.2.6 +hydra-core==1.2.0 +gradio==3.36.1 +tensorflow==2.15.1 +asttokens==3.0.1 +importlib-metadata==5.2.0 +astunparse==1.6.3 +tifffile==2024.8.30 +annotated-types==0.7.0 +Bottleneck==1.4.2 +accelerate==1.0.1 +pytz==2025.2 +urllib3==2.5.0 +frozenlist==1.8.0 +sentry-sdk==2.50.0 +jsonschema==4.25.1 +tyro==0.9.1 +Farama-Notifications==0.0.4 +ffmpy==1.0.0 +httpx==0.28.1 +pymunk==6.2.1 +shtab==1.7.2 +glfw==2.0.0 +hf-xet==1.1.8 +omegaconf==2.2.1 +blobfile==3.0.0 +decorator==5.2.1 +cffi==1.17.1 +matplotlib-inline==0.2.1 +eval_type_backport==0.2.2 +torchaudio==2.2.2 +colorama==0.4.6 +click==8.1.8 +Cython==0.29.37 +orjson==3.11.5 +gym_bandits==0.0.2 +traitlets==5.14.3 +docker-pycreds==0.4.0 +multiprocess==0.70.15 +zipp==3.21.0 +antlr4-python3-runtime==4.9.3 +uc-micro-py==1.0.3 +mpmath==1.3.0 +idna==3.11 +aiodns==3.5.0 +charset-normalizer==3.4.4 +nvidia-nvjitlink-cu12==12.9.86 +nvidia-cuda-nvrtc-cu12==12.1.105 +seaborn==0.13.2 +pyarrow-hotfix==0.7 +pillow==11.3.0 +pyautogen==0.1.0 +requests==2.32.0 +MarkupSafe==3.0.2 +websockets==15.0.1 +nvidia-nccl-cu12==2.19.3 +pure_eval==0.2.3 +parso==0.8.5 +huggingface-hub==0.26.2 +syllables==1.0.9 +tf-agents==0.19.0 +six==1.17.0 +referencing==0.36.2 +ptyprocess==0.7.0 +platformdirs==4.4.0 +fastapi==0.128.0 +stable-baselines3==2.2.1 +av==10.0.0 +diskcache==5.6.3 +pynvml==13.0.1 +pytorch-seed==0.2.0 +zarr==2.12.0 +mdurl==0.1.2 +docstring-parser==0.16 +packaging==25.0 +numcodecs==0.12.1 +opt_einsum==3.4.0 +markdown-it-py==2.2.0 +nvidia-cuda-runtime-cu12==12.1.105 +PyWavelets==1.6.0 +datasets==2.19.0 +contourpy==1.3.0 +aiohappyeyeballs==2.6.1 +jaxlib==0.4.30 +ImageIO==2.37.2 +wandb==0.18.6 +jiter==0.12.0 +gymnasium==0.29.1 +pycryptodomex==3.23.0 +google-pasta==0.2.0 +ipython==8.18.1 +threadpoolctl==3.6.0 +py-cpuinfo==9.0.0 +bitsandbytes==0.45.0 +xxhash==3.5.0 +google-auth-oauthlib==1.2.4 +rsa==4.9.1 +rouge_score==0.1.2 +dm-control==1.0.14 +oauthlib==3.3.1 +pandas==2.3.3 +tenacity==9.1.2 +asciitree==0.3.3 +scipy==1.13.1 +jedi==0.19.2 +gast==0.7.0 +google-auth==2.47.0 +transforms3d==0.4.2 +kiwisolver==1.4.7 +matplotlib==3.7.5 +aiohttp==3.12.15 +pip==23.3.2 +imageio-ffmpeg==0.6.0 +deepspeed==0.16.1 +yarl==1.18.0 +nvidia-nvtx-cu12==12.1.105 +llfbench==0.1.0 +wheel==0.45.1 +PySocks==1.7.1 +ml-dtypes==0.3.2 +PyYAML==6.0.2 +fast_kinematics==0.2.2 +gin-config==0.5.0 +setproctitle==1.3.7 +safetensors==0.5.3 +torchvision==0.17.2 +semantic-version==2.10.0 +PyOpenGL==3.1.10 +nltk==3.9.2 +lxml==6.0.2 +pydantic==2.12.5 +tqdm==4.67.1 +keras==2.15.0 +parse==1.19.1 +linkify-it-py==2.0.3 +dm-tree==0.1.8 +requests-oauthlib==2.0.0 +scikit-learn==1.6.1 +altair==6.0.0 +Werkzeug==3.1.5 +sentencepiece==0.2.0 +uvicorn==0.39.0 +cycler==0.12.1 +transformers==4.47.1 +uvloop==0.22.1 +mkl_random==1.2.8 +GitPython==3.1.46 +regex==2025.9.1 +jax==0.4.30 +llvmlite==0.39.1 +pyasn1_modules==0.4.2 +nvidia-cudnn-cu12==8.9.2.26 +pydantic_core==2.41.5 +google-genai==1.47.0 +propcache==0.3.1 +pycares==4.10.0 +pyperclip==1.11.0 +pyasn1==0.6.2 +async-timeout==5.0.1 +psutil==7.0.0 +gym==0.23.1 +dm-env==1.6 +Jinja2==3.1.6 +sentence-transformers==3.2.1 +einops==0.4.1 +triton==2.2.0 +grpcio==1.76.0 +labmaze==1.0.6 +nvidia-ml-py==13.590.44 +brotlicffi==1.0.9.2 +smmap==5.0.2 +cloudpickle==3.1.2 +setuptools==80.9.0 +starlette==0.49.3 +prompt_toolkit==3.0.52 +wrapt==1.14.2 +h5py==3.14.0 +scikit-image==0.19.3 +joblib==1.5.3 +opencv-python==4.11.0.86 +rich==14.2.0 +trl==0.11.4 +gym-notices==0.1.0 +trimesh==4.11.1 +mdit-py-plugins==0.3.3 +distro==1.9.0 +executing==2.2.1 +mkl-service==2.4.0 +nvidia-cusolver-cu12==11.4.5.107 +FLAML==2.3.6 +mujoco-py==2.1.2.14 +h11==0.16.0 +highway-env==1.9.1 +httpcore==1.0.9 +tensorboard-data-server==0.7.2 +tzdata==2025.3 +absl-py==2.3.1 +jsonschema-specifications==2025.9.1 +numba==0.56.4 +tabulate==0.9.0 +importlib-resources==5.13.0 +pycparser==2.23 +mkl_fft==1.3.11 +torch==2.2.2 +nvidia-cublas-cu12==12.1.3.1 +zipp==3.19.2 +jaraco.text==3.12.1 +jaraco.context==5.3.0 +importlib_metadata==8.0.0 +typeguard==4.3.0 +inflect==7.3.1 +more-itertools==10.3.0 +wheel==0.45.1 +packaging==24.2 +backports.tarfile==1.2.0 +autocommand==2.2.2 +jaraco.collections==5.1.0 +tomli==2.0.1 +platformdirs==4.2.2 +jaraco.functools==4.0.1 +typing_extensions==4.12.2 diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-metadata.json b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..886d1414ead8c17e48773848e5b6ad64f5ea2d8b --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-metadata.json @@ -0,0 +1,55 @@ +{ + "os": "Linux-4.18.0-513.24.1.el8_9.x86_64-x86_64-with-glibc2.28", + "python": "3.9.25", + "startedAt": "2026-01-21T04:19:19.675401Z", + "args": [ + "--config-path", + "./config/main_table", + "--config-name", + "llmbc_box-close-v2.yaml" + ], + "program": "/work/u1131674/LLM-BC/./train.py", + "codePath": "train.py", + "git": { + "remote": "https://github.com/CHYang25/LLM-BC.git", + "commit": "1d2e1f5818e116390426ef596d075fc0cf1b0081" + }, + "email": "chris920325@gmail.com", + "root": "/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2", + "host": "cbi-lgn01", + "username": "u1131674", + "executable": "/home/u1131674/.conda/envs/llm-bc/bin/python3", + "codePathLocal": "train.py", + "cpu_count": 112, + "cpu_count_logical": 224, + "gpu": "NVIDIA H100 PCIe", + "gpu_count": 2, + "disk": { + "/": { + "total": "473745891328", + "used": "389026504704" + } + }, + "memory": { + "total": "540117905408" + }, + "cpu": { + "count": 112, + "countLogical": 224 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper" + }, + { + "name": "NVIDIA H100 PCIe", + "memoryTotal": "85520809984", + "cudaCores": 14592, + "architecture": "Hopper" + } + ], + "cudaVersion": "12.4" +} \ No newline at end of file diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-summary.json b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..dad480314ba72b5b27ea7476f63eb281bf5bd8d0 --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-summary.json @@ -0,0 +1 @@ +{"train_loss":0.024963906034827232,"_timestamp":1.7689691981223695e+09,"train_loss_bc":0.020005209371447563,"_wandb":{"runtime":71},"epoch":0,"_runtime":71.527189585,"train_loss_llm":0.49586963653564453,"_step":236,"grad_norm":0.1454334259033203,"global_step":236,"lr":0.009999988862926341} \ No newline at end of file diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..b86bfd414a83fd772ce0b2ee5e2658b44f619b03 --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log @@ -0,0 +1,12 @@ +{"time":"2026-01-21T12:19:19.051245689+08:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpu06061ms/port-2070718.txt","pid":2070718,"debug":false,"disable-analytics":false} +{"time":"2026-01-21T12:19:19.051287504+08:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false} +{"time":"2026-01-21T12:19:19.051845713+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":2070718} +{"time":"2026-01-21T12:19:19.051828427+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":43057,"Zone":""}} +{"time":"2026-01-21T12:19:19.231239451+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:33440"} +{"time":"2026-01-21T12:19:19.675902286+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"9puzigbg","id":"127.0.0.1:33440"} +{"time":"2026-01-21T12:19:19.791119243+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"9puzigbg","id":"127.0.0.1:33440"} +{"time":"2026-01-21T12:20:31.202365496+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:33440"} +{"time":"2026-01-21T12:20:31.20258465+08:00","level":"INFO","msg":"server is shutting down"} +{"time":"2026-01-21T12:20:31.202524542+08:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:33440"} +{"time":"2026-01-21T12:20:31.202695965+08:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:33440"} +{"time":"2026-01-21T12:20:31.981247472+08:00","level":"INFO","msg":"Parent process exited, terminating service process."} diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..be3b030f9802cf4921193e9e4af1f91d32004dbb --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-01-21T12:19:19.67691431+08:00","level":"INFO","msg":"using version","core version":"0.18.6"} +{"time":"2026-01-21T12:19:19.676924583+08:00","level":"INFO","msg":"created symlink","path":"/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log"} +{"time":"2026-01-21T12:19:19.791067511+08:00","level":"INFO","msg":"created new stream","id":"9puzigbg"} +{"time":"2026-01-21T12:19:19.791113731+08:00","level":"INFO","msg":"stream: started","id":"9puzigbg"} +{"time":"2026-01-21T12:19:19.791148479+08:00","level":"INFO","msg":"sender: started","stream_id":"9puzigbg"} +{"time":"2026-01-21T12:19:19.791138771+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"9puzigbg"}} +{"time":"2026-01-21T12:19:19.791131709+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"9puzigbg"}} +{"time":"2026-01-21T12:19:20.473667126+08:00","level":"INFO","msg":"Starting system monitor"} +{"time":"2026-01-21T12:20:31.202511022+08:00","level":"INFO","msg":"stream: closing","id":"9puzigbg"} +{"time":"2026-01-21T12:20:31.202606065+08:00","level":"INFO","msg":"Stopping system monitor"} +{"time":"2026-01-21T12:20:31.262777289+08:00","level":"INFO","msg":"Stopped system monitor"} diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..7a1b1b564682a68bebe83c612ed398ce349a0390 --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log @@ -0,0 +1,27 @@ +2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6 +2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Configure stats pid to 2070718 +2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /home/u1131674/.config/wandb/settings +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /work/u1131674/LLM-BC/wandb/settings +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None} +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/work/u1131674/LLM-BC/train.py', 'program': '/work/u1131674/LLM-BC/./train.py'} +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying login settings: {} +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():533] Logging user logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():534] Logging internal logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():619] calling init triggers +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():626] wandb.init called with sweep_config: {} +config: {'name': 'train_llmbc_lowdim', '_target_': 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace', 'obs_dim': 9, 'action_dim': 4, 'task_name': 'box-close-v2', 'exp_name': 'default', 'model_name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'llm_orig_expert_feedback': True, 'llm_do_sample': False, 'policy': {'_target_': 'llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy', 'model': {'_target_': 'llmbc.model.policy.policy_mlp.PolicyMLP', 'input_size': 9, 'hidden_size': [256, 256], 'output_size': 4, 'activation': 'relu', 'n_obs_steps': 1, 'n_action_steps': 1}, 'obs_dim': 9, 'action_dim': 4, 'llm_discriminator': {'_target_': 'llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator', 'task_id': 'box-close-v2', 'llm_translator': {'_target_': 'llmbc.translator.llm_translator.LLMTranslator', 'cfg': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}, 'obs_dim': 9, 'action_dim': 4, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1}}, 'loss_bc_weight': 1.0, 'loss_llm_weight': 0.01, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'normalize_llm_loss': True}, 'dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.AdamW', 'lr': 0.01, 'betas': [0.95, 0.999], 'eps': 1e-08, 'weight_decay': 1e-06}, 'training': {'device': 'cuda:0', 'seed': 42, 'debug': False, 'resume': False, 'lr_scheduler': 'cosine', 'lr_warmup_steps': 10, 'num_epochs': 1001, 'gradient_accumulate_every': 8, 'grad_norm_clip': 0.5, 'rollout_every': 5, 'checkpoint_every': 5, 'val_every': 1, 'sample_every': 5, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'box-close-v2-training', 'resume': True, 'mode': 'online', 'name': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2', 'tags': ['train_llmbc_lowdim', 'box-close-v2', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2', 'wandb_name_base': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2'}, 'task': {'name': 'box-close-v2', 'obs_dim': 9, 'action_dim': 4, 'env_runner': {'_target_': 'llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner', 'env_name': 'llf-metaworld-box-close-v2', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 30, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.9}, 'dataset': {'_target_': 'llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset', 'data_path': 'datasets/box-close-v2.pt', 'data_path2': 'datasets/box-close-v2.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': True}, 'instructor': {'_target_': 'llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor'}}, 'llm': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}} +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():669] starting backend +2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():673] sending inform_init request +2026-01-21 12:19:19,674 INFO MainThread:2070718 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2026-01-21 12:19:19,675 INFO MainThread:2070718 [wandb_init.py:init():686] backend started and connected +2026-01-21 12:19:19,684 INFO MainThread:2070718 [wandb_init.py:init():781] updated telemetry +2026-01-21 12:19:19,759 INFO MainThread:2070718 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout +2026-01-21 12:19:20,469 INFO MainThread:2070718 [wandb_init.py:init():867] starting run threads in backend +2026-01-21 12:19:20,990 INFO MainThread:2070718 [wandb_run.py:_console_start():2451] atexit reg +2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2299] redirect: wrap_raw +2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2364] Wrapping output streams. +2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2389] Redirects installed. +2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_init.py:init():911] run started, returning control to user process +2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2'} +2026-01-21 12:20:31,202 WARNING MsgRouterThr:2070718 [router.py:message_loop():75] message_loop has been closed diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/run-9puzigbg.wandb b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/run-9puzigbg.wandb new file mode 100644 index 0000000000000000000000000000000000000000..a60e850bca54866b9d26440ec455f39eaf27baa2 --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/run-9puzigbg.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97675973ce8e04390938123162984a49c1513ce052c76ac14c48280b33003e11 +size 229376 diff --git a/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/wandb-resume.json b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/wandb-resume.json new file mode 100644 index 0000000000000000000000000000000000000000..de345f8b63e1a3dfba54b6b7f33017f2dfd3590e --- /dev/null +++ b/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/wandb-resume.json @@ -0,0 +1 @@ +{"run_id": "9puzigbg"} \ No newline at end of file diff --git a/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..298b1e4782dd5ce3de258d99c159c0b0a2c76e92 --- /dev/null +++ b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml @@ -0,0 +1,163 @@ +name: train_llmbc_lowdim +_target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace +obs_dim: ${task.obs_dim} +action_dim: ${task.action_dim} +task_name: ${task.name} +exp_name: default +model_name: ${llm.name} +horizon: 1 +n_obs_steps: 1 +n_action_steps: 1 +n_latency_steps: 0 +past_action_visible: false +llm_orig_expert_feedback: true +llm_do_sample: false +policy: + _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy + model: + _target_: llmbc.model.policy.policy_mlp.PolicyMLP + input_size: ${eval:'${n_obs_steps}*${obs_dim}'} + hidden_size: + - 256 + - 256 + output_size: ${eval:'${n_action_steps}*${action_dim}'} + activation: relu + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + llm_discriminator: + _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator + task_id: ${task_name} + llm_translator: + _target_: llmbc.translator.llm_translator.LLMTranslator + cfg: ${llm} + obs_dim: ${task.obs_dim} + action_dim: ${task.action_dim} + horizon: ${horizon} + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + loss_bc_weight: 1.0 + loss_llm_weight: 0.001 + horizon: ${horizon} + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + normalize_llm_loss: true +dataloader: + batch_size: 16 + num_workers: 0 + shuffle: true + pin_memory: false + persistent_workers: false +val_dataloader: + batch_size: 16 + num_workers: 0 + shuffle: true + pin_memory: false + persistent_workers: false +optimizer: + _target_: torch.optim.AdamW + lr: 0.01 + betas: + - 0.95 + - 0.999 + eps: 1.0e-08 + weight_decay: 1.0e-06 +training: + device: cuda:0 + seed: 42 + debug: false + resume: false + lr_scheduler: cosine + lr_warmup_steps: 10 + num_epochs: 1001 + gradient_accumulate_every: 8 + grad_norm_clip: 0.5 + rollout_every: 5 + checkpoint_every: 5 + val_every: 1 + sample_every: 5 + sample_max_batch: 128 + max_train_steps: null + max_val_steps: null + tqdm_interval_sec: 1.0 +logging: + project: ${task.name}-training + resume: true + mode: online + name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} + tags: + - ${name} + - ${task_name} + - ${exp_name} + id: null + group: null +checkpoint: + topk: + monitor_key: test_success_rate + mode: max + k: 5 + format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt + save_last_ckpt: true + save_last_snapshot: false +multi_run: + run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} +task: + name: box-close-v2 + obs_dim: 9 + action_dim: 4 + env_runner: + _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner + env_name: llf-metaworld-box-close-v2 + n_train: 10 + n_test: 50 + n_envs: 10 + max_steps: 30 + n_obs_steps: ${n_obs_steps} + n_action_steps: ${n_action_steps} + instruction_type: b + feedback_type: + - hp + - hn + - fp + visual: false + discount: 0.9 + dataset: + _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset + data_path: datasets/box-close-v2.pt + data_path2: datasets/box-close-v2.pt + horizon: ${horizon} + pad_before: ${eval:'${n_obs_steps}-1'} + pad_after: ${eval:'${n_action_steps}-1'} + obs_eef_target: true + use_manual_normalizer: false + val_ratio: 0.1 + dummy_normalizer: true + instructor: + _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor +llm: + name: HuggingFaceTB/SmolLM2-135M-Instruct + model_name: SmolLM2-135M-Instruct + config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig + causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM + use_quantization: false + use_joint_mlp_projector: true + llm_mode: ete-finetuned + finetune_mode: orig + checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890 + max_length: 100 + lora_config: + r: 32 + lora_alpha: 64 + lora_dropout: 0.05 + bias: none + task_type: CAUSAL_LM + prompter: + _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter + use_joint_mlp_projector: true + hydra: + job: + override_dirname: ${model_name} + run: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name} diff --git a/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce3f898591ead1e5332b6237f60dad9e308fa5aa --- /dev/null +++ b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml @@ -0,0 +1,156 @@ +hydra: + run: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + sweep: + dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - policy.loss_llm_weight=1.0e-3 + - training.seed=42 + job: + name: train + chdir: null + override_dirname: policy.loss_llm_weight=1.0e-3,training.seed=42 + id: ??? + num: ??? + config_name: llmbc_box-close-v2.yaml + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.2.0 + version_base: '1.2' + cwd: /work/u1131674/LLM-BC + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /work/u1131674/LLM-BC/config/main_table + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2 + choices: + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3874e1763723794be9fad42d5bb97ae9a29fb0e --- /dev/null +++ b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml @@ -0,0 +1,2 @@ +- policy.loss_llm_weight=1.0e-3 +- training.seed=42 diff --git a/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/epoch=0000-test_success_rate=0.000.ckpt b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/epoch=0000-test_success_rate=0.000.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..f344e62187ca431f69d028e518ac822bed122a9d --- /dev/null +++ b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/epoch=0000-test_success_rate=0.000.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:841ce226dfd93b12ebfd588842ca350a66ecadb7d9fc334812aa3b8de27543ab +size 864520 diff --git a/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/latest.ckpt b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/latest.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..f344e62187ca431f69d028e518ac822bed122a9d --- /dev/null +++ b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/latest.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:841ce226dfd93b12ebfd588842ca350a66ecadb7d9fc334812aa3b8de27543ab +size 864520 diff --git a/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/logs.json.txt b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/logs.json.txt new file mode 100644 index 0000000000000000000000000000000000000000..c189244eab7f63b27e6442680e8c74986c8d172d --- /dev/null +++ b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/logs.json.txt @@ -0,0 +1,418 @@ +{"train_loss": 0.252529501914978, "train_loss_bc": 0.25195011496543884, "train_loss_llm": 0.5793765187263489, "grad_norm": 0.12839388847351074, "global_step": 0, "epoch": 0, "lr": 0.001} +{"train_loss": 0.273204505443573, "train_loss_bc": 0.27264082431793213, "train_loss_llm": 0.563692033290863, "grad_norm": 0.13485388457775116, "global_step": 1, "epoch": 0, "lr": 0.001} +{"train_loss": 0.2867761254310608, "train_loss_bc": 0.28621771931648254, "train_loss_llm": 0.5584008693695068, "grad_norm": 0.274769127368927, "global_step": 2, "epoch": 0, "lr": 0.001} +{"train_loss": 0.2871931791305542, "train_loss_bc": 0.2865779399871826, "train_loss_llm": 0.6152305006980896, "grad_norm": 0.41221097111701965, "global_step": 3, "epoch": 0, "lr": 0.001} +{"train_loss": 0.28025686740875244, "train_loss_bc": 0.2797144949436188, "train_loss_llm": 0.5423757433891296, "grad_norm": 0.5496014356613159, "global_step": 4, "epoch": 0, "lr": 0.001} +{"train_loss": 0.3149482309818268, "train_loss_bc": 0.31439733505249023, "train_loss_llm": 0.5508872866630554, "grad_norm": 0.6956393718719482, "global_step": 5, "epoch": 0, "lr": 0.001} +{"train_loss": 0.27254703640937805, "train_loss_bc": 0.27196407318115234, "train_loss_llm": 0.5829575061798096, "grad_norm": 0.8312950730323792, "global_step": 6, "epoch": 0, "lr": 0.001} +{"train_loss": 0.22602498531341553, "train_loss_bc": 0.22543349862098694, "train_loss_llm": 0.5914822816848755, "grad_norm": 0.9541406631469727, "global_step": 7, "epoch": 0, "lr": 0.001} +{"train_loss": 0.20342595875263214, "train_loss_bc": 0.2028963267803192, "train_loss_llm": 0.5296257734298706, "grad_norm": 1.0699303150177002, "global_step": 8, "epoch": 0, "lr": 0.002} +{"train_loss": 0.19929638504981995, "train_loss_bc": 0.19871878623962402, "train_loss_llm": 0.5776059627532959, "grad_norm": 0.11595484614372253, "global_step": 9, "epoch": 0, "lr": 0.002} +{"train_loss": 0.21191416680812836, "train_loss_bc": 0.21130315959453583, "train_loss_llm": 0.6110129952430725, "grad_norm": 0.23422954976558685, "global_step": 10, "epoch": 0, "lr": 0.002} +{"train_loss": 0.2068999856710434, "train_loss_bc": 0.2063978612422943, "train_loss_llm": 0.5021252632141113, "grad_norm": 0.3522001802921295, "global_step": 11, "epoch": 0, "lr": 0.002} +{"train_loss": 0.257265567779541, "train_loss_bc": 0.25662338733673096, "train_loss_llm": 0.6421942710876465, "grad_norm": 0.483461856842041, "global_step": 12, "epoch": 0, "lr": 0.002} +{"train_loss": 0.23878663778305054, "train_loss_bc": 0.2381792515516281, "train_loss_llm": 0.6073929071426392, "grad_norm": 0.6102063059806824, "global_step": 13, "epoch": 0, "lr": 0.002} +{"train_loss": 0.2712763547897339, "train_loss_bc": 0.27066537737846375, "train_loss_llm": 0.6109854578971863, "grad_norm": 0.7479075789451599, "global_step": 14, "epoch": 0, "lr": 0.002} +{"train_loss": 0.24330928921699524, "train_loss_bc": 0.2427230179309845, "train_loss_llm": 0.586268424987793, "grad_norm": 0.8762980699539185, "global_step": 15, "epoch": 0, "lr": 0.002} +{"train_loss": 0.20316186547279358, "train_loss_bc": 0.20266824960708618, "train_loss_llm": 0.4936148524284363, "grad_norm": 0.992440402507782, "global_step": 16, "epoch": 0, "lr": 0.003} +{"train_loss": 0.1635446846485138, "train_loss_bc": 0.162959486246109, "train_loss_llm": 0.5851912498474121, "grad_norm": 0.11341577023267746, "global_step": 17, "epoch": 0, "lr": 0.003} +{"train_loss": 0.1420236974954605, "train_loss_bc": 0.14150172472000122, "train_loss_llm": 0.5219756364822388, "grad_norm": 0.2166670560836792, "global_step": 18, "epoch": 0, "lr": 0.003} +{"train_loss": 0.08970867097377777, "train_loss_bc": 0.08923432230949402, "train_loss_llm": 0.4743492901325226, "grad_norm": 0.2942521870136261, "global_step": 19, "epoch": 0, "lr": 0.003} +{"train_loss": 0.1407971978187561, "train_loss_bc": 0.14016547799110413, "train_loss_llm": 0.631725013256073, "grad_norm": 0.3959764838218689, "global_step": 20, "epoch": 0, "lr": 0.003} +{"train_loss": 0.12558668851852417, "train_loss_bc": 0.12498115748167038, "train_loss_llm": 0.6055365800857544, "grad_norm": 0.4912969172000885, "global_step": 21, "epoch": 0, "lr": 0.003} +{"train_loss": 0.15840043127536774, "train_loss_bc": 0.15789246559143066, "train_loss_llm": 0.5079687833786011, "grad_norm": 0.6031914949417114, "global_step": 22, "epoch": 0, "lr": 0.003} +{"train_loss": 0.15493251383304596, "train_loss_bc": 0.15430215001106262, "train_loss_llm": 0.6303583383560181, "grad_norm": 0.712800145149231, "global_step": 23, "epoch": 0, "lr": 0.003} +{"train_loss": 0.09710954874753952, "train_loss_bc": 0.09661616384983063, "train_loss_llm": 0.4933878481388092, "grad_norm": 0.7942712306976318, "global_step": 24, "epoch": 0, "lr": 0.004} +{"train_loss": 0.04198349267244339, "train_loss_bc": 0.04147119075059891, "train_loss_llm": 0.5123016834259033, "grad_norm": 0.049896661192178726, "global_step": 25, "epoch": 0, "lr": 0.004} +{"train_loss": 0.04926488921046257, "train_loss_bc": 0.04879248887300491, "train_loss_llm": 0.4724003076553345, "grad_norm": 0.10693306475877762, "global_step": 26, "epoch": 0, "lr": 0.004} +{"train_loss": 0.03119494765996933, "train_loss_bc": 0.03079175390303135, "train_loss_llm": 0.40319401025772095, "grad_norm": 0.130178764462471, "global_step": 27, "epoch": 0, "lr": 0.004} +{"train_loss": 0.045984115451574326, "train_loss_bc": 0.04547495022416115, "train_loss_llm": 0.5091666579246521, "grad_norm": 0.18151648342609406, "global_step": 28, "epoch": 0, "lr": 0.004} +{"train_loss": 0.036746662110090256, "train_loss_bc": 0.036322131752967834, "train_loss_llm": 0.4245292544364929, "grad_norm": 0.22341406345367432, "global_step": 29, "epoch": 0, "lr": 0.004} +{"train_loss": 0.06587483733892441, "train_loss_bc": 0.06540372967720032, "train_loss_llm": 0.47110506892204285, "grad_norm": 0.30597466230392456, "global_step": 30, "epoch": 0, "lr": 0.004} +{"train_loss": 0.05170199275016785, "train_loss_bc": 0.05132713168859482, "train_loss_llm": 0.37486234307289124, "grad_norm": 0.3634960949420929, "global_step": 31, "epoch": 0, "lr": 0.004} +{"train_loss": 0.05630849674344063, "train_loss_bc": 0.0558805912733078, "train_loss_llm": 0.42790722846984863, "grad_norm": 0.4307665228843689, "global_step": 32, "epoch": 0, "lr": 0.005} +{"train_loss": 0.0553022176027298, "train_loss_bc": 0.05469208583235741, "train_loss_llm": 0.6101305484771729, "grad_norm": 0.08577623218297958, "global_step": 33, "epoch": 0, "lr": 0.005} +{"train_loss": 0.04831269383430481, "train_loss_bc": 0.04779437929391861, "train_loss_llm": 0.5183138847351074, "grad_norm": 0.15602092444896698, "global_step": 34, "epoch": 0, "lr": 0.005} +{"train_loss": 0.061867598444223404, "train_loss_bc": 0.06128372997045517, "train_loss_llm": 0.5838690996170044, "grad_norm": 0.2528131902217865, "global_step": 35, "epoch": 0, "lr": 0.005} +{"train_loss": 0.05686777085065842, "train_loss_bc": 0.05627113953232765, "train_loss_llm": 0.5966323614120483, "grad_norm": 0.3395236134529114, "global_step": 36, "epoch": 0, "lr": 0.005} +{"train_loss": 0.03382698819041252, "train_loss_bc": 0.03323305398225784, "train_loss_llm": 0.5939337611198425, "grad_norm": 0.3958278000354767, "global_step": 37, "epoch": 0, "lr": 0.005} +{"train_loss": 0.06224585324525833, "train_loss_bc": 0.0616149976849556, "train_loss_llm": 0.6308567523956299, "grad_norm": 0.4894043505191803, "global_step": 38, "epoch": 0, "lr": 0.005} +{"train_loss": 0.04555570334196091, "train_loss_bc": 0.04494024068117142, "train_loss_llm": 0.6154611110687256, "grad_norm": 0.5536556839942932, "global_step": 39, "epoch": 0, "lr": 0.005} +{"train_loss": 0.03574361279606819, "train_loss_bc": 0.03507951647043228, "train_loss_llm": 0.6640970706939697, "grad_norm": 0.6100818514823914, "global_step": 40, "epoch": 0, "lr": 0.006} +{"train_loss": 0.146262988448143, "train_loss_bc": 0.14580723643302917, "train_loss_llm": 0.4557466208934784, "grad_norm": 0.19763296842575073, "global_step": 41, "epoch": 0, "lr": 0.006} +{"train_loss": 0.11445678770542145, "train_loss_bc": 0.11390470713376999, "train_loss_llm": 0.5520769357681274, "grad_norm": 0.3685164451599121, "global_step": 42, "epoch": 0, "lr": 0.006} +{"train_loss": 0.10677710175514221, "train_loss_bc": 0.10625766217708588, "train_loss_llm": 0.5194418430328369, "grad_norm": 0.5320614576339722, "global_step": 43, "epoch": 0, "lr": 0.006} +{"train_loss": 0.12251483649015427, "train_loss_bc": 0.12198641151189804, "train_loss_llm": 0.5284275412559509, "grad_norm": 0.7118619680404663, "global_step": 44, "epoch": 0, "lr": 0.006} +{"train_loss": 0.14140570163726807, "train_loss_bc": 0.1408904492855072, "train_loss_llm": 0.5152463316917419, "grad_norm": 0.9093842506408691, "global_step": 45, "epoch": 0, "lr": 0.006} +{"train_loss": 0.10901694744825363, "train_loss_bc": 0.1084449291229248, "train_loss_llm": 0.5720197558403015, "grad_norm": 1.0770854949951172, "global_step": 46, "epoch": 0, "lr": 0.006} +{"train_loss": 0.13558131456375122, "train_loss_bc": 0.13501602411270142, "train_loss_llm": 0.565291702747345, "grad_norm": 1.2658616304397583, "global_step": 47, "epoch": 0, "lr": 0.006} +{"train_loss": 0.14484672248363495, "train_loss_bc": 0.14428021013736725, "train_loss_llm": 0.5665071606636047, "grad_norm": 1.4656471014022827, "global_step": 48, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.24264752864837646, "train_loss_bc": 0.2419467568397522, "train_loss_llm": 0.7007750272750854, "grad_norm": 0.2969740033149719, "global_step": 49, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.13805940747261047, "train_loss_bc": 0.1374894082546234, "train_loss_llm": 0.5699948072433472, "grad_norm": 0.5104647874832153, "global_step": 50, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.16542810201644897, "train_loss_bc": 0.16495351493358612, "train_loss_llm": 0.47459012269973755, "grad_norm": 0.7459866404533386, "global_step": 51, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.25657016038894653, "train_loss_bc": 0.25585728883743286, "train_loss_llm": 0.7128623723983765, "grad_norm": 1.0538054704666138, "global_step": 52, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.20239487290382385, "train_loss_bc": 0.20180177688598633, "train_loss_llm": 0.5930944085121155, "grad_norm": 1.316612958908081, "global_step": 53, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.1541372388601303, "train_loss_bc": 0.15368221700191498, "train_loss_llm": 0.45501962304115295, "grad_norm": 1.5417735576629639, "global_step": 54, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.2185448706150055, "train_loss_bc": 0.2180437594652176, "train_loss_llm": 0.5011103749275208, "grad_norm": 1.8187888860702515, "global_step": 55, "epoch": 0, "lr": 0.006999999999999999} +{"train_loss": 0.20139560103416443, "train_loss_bc": 0.20086990296840668, "train_loss_llm": 0.5256961584091187, "grad_norm": 2.08247447013855, "global_step": 56, "epoch": 0, "lr": 0.008} +{"train_loss": 0.20989899337291718, "train_loss_bc": 0.20911380648612976, "train_loss_llm": 0.7851892709732056, "grad_norm": 0.27354303002357483, "global_step": 57, "epoch": 0, "lr": 0.008} +{"train_loss": 0.19207656383514404, "train_loss_bc": 0.19136708974838257, "train_loss_llm": 0.7094740867614746, "grad_norm": 0.534111499786377, "global_step": 58, "epoch": 0, "lr": 0.008} +{"train_loss": 0.1742924004793167, "train_loss_bc": 0.17367114126682281, "train_loss_llm": 0.6212564706802368, "grad_norm": 0.7795819044113159, "global_step": 59, "epoch": 0, "lr": 0.008} +{"train_loss": 0.1624690294265747, "train_loss_bc": 0.1617729365825653, "train_loss_llm": 0.6960869431495667, "grad_norm": 1.0119670629501343, "global_step": 60, "epoch": 0, "lr": 0.008} +{"train_loss": 0.20042455196380615, "train_loss_bc": 0.19979658722877502, "train_loss_llm": 0.6279683113098145, "grad_norm": 1.274623990058899, "global_step": 61, "epoch": 0, "lr": 0.008} +{"train_loss": 0.16158545017242432, "train_loss_bc": 0.16083624958992004, "train_loss_llm": 0.7492036819458008, "grad_norm": 1.5101232528686523, "global_step": 62, "epoch": 0, "lr": 0.008} +{"train_loss": 0.13282041251659393, "train_loss_bc": 0.13209721446037292, "train_loss_llm": 0.723200798034668, "grad_norm": 1.7186288833618164, "global_step": 63, "epoch": 0, "lr": 0.008} +{"train_loss": 0.2033994346857071, "train_loss_bc": 0.2027282416820526, "train_loss_llm": 0.6711894273757935, "grad_norm": 1.9846457242965698, "global_step": 64, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.09530064463615417, "train_loss_bc": 0.09461785107851028, "train_loss_llm": 0.6827924847602844, "grad_norm": 0.1637452095746994, "global_step": 65, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.09785042703151703, "train_loss_bc": 0.09729202836751938, "train_loss_llm": 0.558398962020874, "grad_norm": 0.3286266624927521, "global_step": 66, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.09337419271469116, "train_loss_bc": 0.09270930290222168, "train_loss_llm": 0.6648919582366943, "grad_norm": 0.48786014318466187, "global_step": 67, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.17027954757213593, "train_loss_bc": 0.16956308484077454, "train_loss_llm": 0.7164597511291504, "grad_norm": 0.7218278050422668, "global_step": 68, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.08503676950931549, "train_loss_bc": 0.08446164429187775, "train_loss_llm": 0.5751272439956665, "grad_norm": 0.8772305250167847, "global_step": 69, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.10142002999782562, "train_loss_bc": 0.10081231594085693, "train_loss_llm": 0.6077142953872681, "grad_norm": 1.04507315158844, "global_step": 70, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.11661797761917114, "train_loss_bc": 0.11599370092153549, "train_loss_llm": 0.6242777109146118, "grad_norm": 1.2287834882736206, "global_step": 71, "epoch": 0, "lr": 0.009000000000000001} +{"train_loss": 0.11624407768249512, "train_loss_bc": 0.11565285921096802, "train_loss_llm": 0.5912151336669922, "grad_norm": 1.4120811223983765, "global_step": 72, "epoch": 0, "lr": 0.01} +{"train_loss": 0.040211960673332214, "train_loss_bc": 0.039551250636577606, "train_loss_llm": 0.6607116460800171, "grad_norm": 0.0777788907289505, "global_step": 73, "epoch": 0, "lr": 0.01} +{"train_loss": 0.05076095834374428, "train_loss_bc": 0.05007569119334221, "train_loss_llm": 0.6852684617042542, "grad_norm": 0.17003870010375977, "global_step": 74, "epoch": 0, "lr": 0.01} +{"train_loss": 0.037128813564777374, "train_loss_bc": 0.03643818572163582, "train_loss_llm": 0.6906265020370483, "grad_norm": 0.244222030043602, "global_step": 75, "epoch": 0, "lr": 0.01} +{"train_loss": 0.037142593413591385, "train_loss_bc": 0.03646159917116165, "train_loss_llm": 0.6809947490692139, "grad_norm": 0.31510722637176514, "global_step": 76, "epoch": 0, "lr": 0.01} +{"train_loss": 0.05590587481856346, "train_loss_bc": 0.0552542544901371, "train_loss_llm": 0.6516196131706238, "grad_norm": 0.4150258004665375, "global_step": 77, "epoch": 0, "lr": 0.01} +{"train_loss": 0.030149903148412704, "train_loss_bc": 0.029475240036845207, "train_loss_llm": 0.6746631860733032, "grad_norm": 0.4752899408340454, "global_step": 78, "epoch": 0, "lr": 0.01} +{"train_loss": 0.050657838582992554, "train_loss_bc": 0.04992213845252991, "train_loss_llm": 0.7356998324394226, "grad_norm": 0.5678731799125671, "global_step": 79, "epoch": 0, "lr": 0.01} +{"train_loss": 0.02764507196843624, "train_loss_bc": 0.027012458071112633, "train_loss_llm": 0.6326141953468323, "grad_norm": 0.625312089920044, "global_step": 80, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.03073396533727646, "train_loss_bc": 0.03021111525595188, "train_loss_llm": 0.5228506326675415, "grad_norm": 0.053040843456983566, "global_step": 81, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.027266209945082664, "train_loss_bc": 0.026712927967309952, "train_loss_llm": 0.5532811880111694, "grad_norm": 0.09477357566356659, "global_step": 82, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.027156496420502663, "train_loss_bc": 0.026670875027775764, "train_loss_llm": 0.48562145233154297, "grad_norm": 0.13936017453670502, "global_step": 83, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.03493297100067139, "train_loss_bc": 0.03437262028455734, "train_loss_llm": 0.5603512525558472, "grad_norm": 0.20259420573711395, "global_step": 84, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.027796030044555664, "train_loss_bc": 0.027264408767223358, "train_loss_llm": 0.5316207408905029, "grad_norm": 0.23961691558361053, "global_step": 85, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.026962831616401672, "train_loss_bc": 0.026478836312890053, "train_loss_llm": 0.4839947819709778, "grad_norm": 0.278042733669281, "global_step": 86, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.022709660232067108, "train_loss_bc": 0.02220826968550682, "train_loss_llm": 0.5013896226882935, "grad_norm": 0.31111451983451843, "global_step": 87, "epoch": 0, "lr": 0.009999999972157305} +{"train_loss": 0.03472929820418358, "train_loss_bc": 0.03418252617120743, "train_loss_llm": 0.5467737913131714, "grad_norm": 0.36670809984207153, "global_step": 88, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.06921354681253433, "train_loss_bc": 0.06863778829574585, "train_loss_llm": 0.5757583975791931, "grad_norm": 0.0987037718296051, "global_step": 89, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.0616544634103775, "train_loss_bc": 0.061051469296216965, "train_loss_llm": 0.602994441986084, "grad_norm": 0.18980276584625244, "global_step": 90, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.05179845914244652, "train_loss_bc": 0.051266275346279144, "train_loss_llm": 0.5321850180625916, "grad_norm": 0.2702069580554962, "global_step": 91, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.06521251052618027, "train_loss_bc": 0.06461584568023682, "train_loss_llm": 0.5966640710830688, "grad_norm": 0.3639739155769348, "global_step": 92, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.06196574494242668, "train_loss_bc": 0.06147213280200958, "train_loss_llm": 0.4936124384403229, "grad_norm": 0.45718127489089966, "global_step": 93, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.04629657045006752, "train_loss_bc": 0.045801080763339996, "train_loss_llm": 0.4954902231693268, "grad_norm": 0.5287754535675049, "global_step": 94, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.04616197198629379, "train_loss_bc": 0.04563061147928238, "train_loss_llm": 0.5313600301742554, "grad_norm": 0.6031498908996582, "global_step": 95, "epoch": 0, "lr": 0.009999999888629223} +{"train_loss": 0.06340043991804123, "train_loss_bc": 0.06277582049369812, "train_loss_llm": 0.6246193647384644, "grad_norm": 0.6954230666160583, "global_step": 96, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.08191214501857758, "train_loss_bc": 0.0813114270567894, "train_loss_llm": 0.6007174253463745, "grad_norm": 0.10619106888771057, "global_step": 97, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.08071709424257278, "train_loss_bc": 0.08003760129213333, "train_loss_llm": 0.6794949769973755, "grad_norm": 0.20760591328144073, "global_step": 98, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.08332143723964691, "train_loss_bc": 0.08272609114646912, "train_loss_llm": 0.5953459739685059, "grad_norm": 0.3134561777114868, "global_step": 99, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.07155264914035797, "train_loss_bc": 0.07099221646785736, "train_loss_llm": 0.5604289770126343, "grad_norm": 0.40877580642700195, "global_step": 100, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.08975838869810104, "train_loss_bc": 0.08916652202606201, "train_loss_llm": 0.5918655395507812, "grad_norm": 0.522554337978363, "global_step": 101, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.07257966697216034, "train_loss_bc": 0.07201467454433441, "train_loss_llm": 0.5649896860122681, "grad_norm": 0.6189970970153809, "global_step": 102, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.06007641553878784, "train_loss_bc": 0.05953027680516243, "train_loss_llm": 0.546138346195221, "grad_norm": 0.702296793460846, "global_step": 103, "epoch": 0, "lr": 0.00999999974941575} +{"train_loss": 0.04568513110280037, "train_loss_bc": 0.045178987085819244, "train_loss_llm": 0.5061453580856323, "grad_norm": 0.7713168263435364, "global_step": 104, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.062235742807388306, "train_loss_bc": 0.061548247933387756, "train_loss_llm": 0.687494158744812, "grad_norm": 0.08229470998048782, "global_step": 105, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.08357568085193634, "train_loss_bc": 0.08299360424280167, "train_loss_llm": 0.5820728540420532, "grad_norm": 0.18586039543151855, "global_step": 106, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.08088018745183945, "train_loss_bc": 0.08020119369029999, "train_loss_llm": 0.6789901256561279, "grad_norm": 0.2842538356781006, "global_step": 107, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.07067245990037918, "train_loss_bc": 0.07003812491893768, "train_loss_llm": 0.6343338489532471, "grad_norm": 0.3756967782974243, "global_step": 108, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.062134191393852234, "train_loss_bc": 0.06162497028708458, "train_loss_llm": 0.50922030210495, "grad_norm": 0.45985621213912964, "global_step": 109, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.05643927678465843, "train_loss_bc": 0.05575673654675484, "train_loss_llm": 0.6825414896011353, "grad_norm": 0.535986065864563, "global_step": 110, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.06275462359189987, "train_loss_bc": 0.06217849254608154, "train_loss_llm": 0.5761322975158691, "grad_norm": 0.6212720274925232, "global_step": 111, "epoch": 0, "lr": 0.009999999554516895} +{"train_loss": 0.0717947706580162, "train_loss_bc": 0.07112696766853333, "train_loss_llm": 0.6678000688552856, "grad_norm": 0.712874174118042, "global_step": 112, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.07107824087142944, "train_loss_bc": 0.07066100835800171, "train_loss_llm": 0.4172302484512329, "grad_norm": 0.09572537988424301, "global_step": 113, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.0622958242893219, "train_loss_bc": 0.0617825910449028, "train_loss_llm": 0.5132333040237427, "grad_norm": 0.18129226565361023, "global_step": 114, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.05546606332063675, "train_loss_bc": 0.054820477962493896, "train_loss_llm": 0.6455863118171692, "grad_norm": 0.2585110068321228, "global_step": 115, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.052835989743471146, "train_loss_bc": 0.052223652601242065, "train_loss_llm": 0.612338662147522, "grad_norm": 0.3340230882167816, "global_step": 116, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.07355044782161713, "train_loss_bc": 0.0729360580444336, "train_loss_llm": 0.614387035369873, "grad_norm": 0.43235117197036743, "global_step": 117, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.058171968907117844, "train_loss_bc": 0.057668983936309814, "train_loss_llm": 0.5029836893081665, "grad_norm": 0.5117653608322144, "global_step": 118, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.06179669499397278, "train_loss_bc": 0.06129191815853119, "train_loss_llm": 0.5047756433486938, "grad_norm": 0.5967010855674744, "global_step": 119, "epoch": 0, "lr": 0.009999999303932654} +{"train_loss": 0.03286455199122429, "train_loss_bc": 0.03232846036553383, "train_loss_llm": 0.536090612411499, "grad_norm": 0.6471052169799805, "global_step": 120, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.03573526442050934, "train_loss_bc": 0.03532949090003967, "train_loss_llm": 0.40577366948127747, "grad_norm": 0.05735393241047859, "global_step": 121, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.038237735629081726, "train_loss_bc": 0.0377943217754364, "train_loss_llm": 0.44341397285461426, "grad_norm": 0.11955223232507706, "global_step": 122, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.05409952253103256, "train_loss_bc": 0.05373173952102661, "train_loss_llm": 0.3677833676338196, "grad_norm": 0.20442572236061096, "global_step": 123, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.04305477812886238, "train_loss_bc": 0.0426463782787323, "train_loss_llm": 0.4084013104438782, "grad_norm": 0.2714554965496063, "global_step": 124, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.035634320229291916, "train_loss_bc": 0.03516857698559761, "train_loss_llm": 0.4657438099384308, "grad_norm": 0.3238549530506134, "global_step": 125, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.05339725315570831, "train_loss_bc": 0.05303904414176941, "train_loss_llm": 0.35820940136909485, "grad_norm": 0.4088974893093109, "global_step": 126, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.04185650870203972, "train_loss_bc": 0.04144421964883804, "train_loss_llm": 0.4122905433177948, "grad_norm": 0.47525259852409363, "global_step": 127, "epoch": 0, "lr": 0.009999998997663032} +{"train_loss": 0.042993541806936264, "train_loss_bc": 0.042601581662893295, "train_loss_llm": 0.3919590413570404, "grad_norm": 0.5411036014556885, "global_step": 128, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.02562362141907215, "train_loss_bc": 0.024971390143036842, "train_loss_llm": 0.6522307395935059, "grad_norm": 0.041133757680654526, "global_step": 129, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.02191310189664364, "train_loss_bc": 0.02129165455698967, "train_loss_llm": 0.6214474439620972, "grad_norm": 0.07116004079580307, "global_step": 130, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.03156816214323044, "train_loss_bc": 0.0309942327439785, "train_loss_llm": 0.5739300847053528, "grad_norm": 0.12592613697052002, "global_step": 131, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.02577713131904602, "train_loss_bc": 0.025125574320554733, "train_loss_llm": 0.6515576839447021, "grad_norm": 0.16578812897205353, "global_step": 132, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.02320152334868908, "train_loss_bc": 0.02264384739100933, "train_loss_llm": 0.5576763153076172, "grad_norm": 0.19451332092285156, "global_step": 133, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.026553723961114883, "train_loss_bc": 0.02588409185409546, "train_loss_llm": 0.6696317195892334, "grad_norm": 0.23911207914352417, "global_step": 134, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.02071528509259224, "train_loss_bc": 0.02012854814529419, "train_loss_llm": 0.5867360830307007, "grad_norm": 0.27343958616256714, "global_step": 135, "epoch": 0, "lr": 0.009999998635708033} +{"train_loss": 0.0177980437874794, "train_loss_bc": 0.01714349165558815, "train_loss_llm": 0.6545513272285461, "grad_norm": 0.2921649217605591, "global_step": 136, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.016527190804481506, "train_loss_bc": 0.015900835394859314, "train_loss_llm": 0.6263555288314819, "grad_norm": 0.027912678197026253, "global_step": 137, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.016124101355671883, "train_loss_bc": 0.015586758963763714, "train_loss_llm": 0.5373432636260986, "grad_norm": 0.058148931711912155, "global_step": 138, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.02101842127740383, "train_loss_bc": 0.020332563668489456, "train_loss_llm": 0.6858576536178589, "grad_norm": 0.07774510979652405, "global_step": 139, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.019856909289956093, "train_loss_bc": 0.019175313413143158, "train_loss_llm": 0.6815959215164185, "grad_norm": 0.1038837879896164, "global_step": 140, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.02635011076927185, "train_loss_bc": 0.02552071586251259, "train_loss_llm": 0.8293948769569397, "grad_norm": 0.13838204741477966, "global_step": 141, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.017442570999264717, "train_loss_bc": 0.016876015812158585, "train_loss_llm": 0.5665552616119385, "grad_norm": 0.16495107114315033, "global_step": 142, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.019354552030563354, "train_loss_bc": 0.018637431785464287, "train_loss_llm": 0.7171201109886169, "grad_norm": 0.19188618659973145, "global_step": 143, "epoch": 0, "lr": 0.009999998218067659} +{"train_loss": 0.02011699415743351, "train_loss_bc": 0.01947595179080963, "train_loss_llm": 0.6410424709320068, "grad_norm": 0.21645328402519226, "global_step": 144, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.02425413206219673, "train_loss_bc": 0.023770108819007874, "train_loss_llm": 0.4840241074562073, "grad_norm": 0.0436902791261673, "global_step": 145, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.022116929292678833, "train_loss_bc": 0.021655619144439697, "train_loss_llm": 0.4613092541694641, "grad_norm": 0.08582352846860886, "global_step": 146, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.021659119054675102, "train_loss_bc": 0.021171528846025467, "train_loss_llm": 0.48758962750434875, "grad_norm": 0.126007542014122, "global_step": 147, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.03805282711982727, "train_loss_bc": 0.03744645416736603, "train_loss_llm": 0.6063730716705322, "grad_norm": 0.19119882583618164, "global_step": 148, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.01933918334543705, "train_loss_bc": 0.018880464136600494, "train_loss_llm": 0.45871883630752563, "grad_norm": 0.2215609848499298, "global_step": 149, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.020558631047606468, "train_loss_bc": 0.019996277987957, "train_loss_llm": 0.5623538494110107, "grad_norm": 0.2572121322154999, "global_step": 150, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.022163955494761467, "train_loss_bc": 0.021520184352993965, "train_loss_llm": 0.6437717080116272, "grad_norm": 0.29958251118659973, "global_step": 151, "epoch": 0, "lr": 0.009999997744741916} +{"train_loss": 0.026239177212119102, "train_loss_bc": 0.025800224393606186, "train_loss_llm": 0.4389524757862091, "grad_norm": 0.34274646639823914, "global_step": 152, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.02925034798681736, "train_loss_bc": 0.028717506676912308, "train_loss_llm": 0.5328419804573059, "grad_norm": 0.05676320195198059, "global_step": 153, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.037463825196027756, "train_loss_bc": 0.03690113127231598, "train_loss_llm": 0.5626922845840454, "grad_norm": 0.12929601967334747, "global_step": 154, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.018557682633399963, "train_loss_bc": 0.018073368817567825, "train_loss_llm": 0.4843147099018097, "grad_norm": 0.17127014696598053, "global_step": 155, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.024363229051232338, "train_loss_bc": 0.02386392466723919, "train_loss_llm": 0.4993036687374115, "grad_norm": 0.2232280820608139, "global_step": 156, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.030357468873262405, "train_loss_bc": 0.029831916093826294, "train_loss_llm": 0.5255520939826965, "grad_norm": 0.2857641279697418, "global_step": 157, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.038563068956136703, "train_loss_bc": 0.03807063773274422, "train_loss_llm": 0.4924296736717224, "grad_norm": 0.3589693307876587, "global_step": 158, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.038679443299770355, "train_loss_bc": 0.03814232721924782, "train_loss_llm": 0.5371164083480835, "grad_norm": 0.42973586916923523, "global_step": 159, "epoch": 0, "lr": 0.00999999721573081} +{"train_loss": 0.03393377736210823, "train_loss_bc": 0.0334688201546669, "train_loss_llm": 0.4649561643600464, "grad_norm": 0.49458184838294983, "global_step": 160, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.03411827236413956, "train_loss_bc": 0.033583398908376694, "train_loss_llm": 0.5348742008209229, "grad_norm": 0.06490988284349442, "global_step": 161, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.02351403422653675, "train_loss_bc": 0.02295222505927086, "train_loss_llm": 0.5618085861206055, "grad_norm": 0.11563616991043091, "global_step": 162, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.028045671060681343, "train_loss_bc": 0.027559760957956314, "train_loss_llm": 0.4859097898006439, "grad_norm": 0.17181871831417084, "global_step": 163, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.012934507802128792, "train_loss_bc": 0.012457642704248428, "train_loss_llm": 0.4768647849559784, "grad_norm": 0.20869013667106628, "global_step": 164, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.02624637447297573, "train_loss_bc": 0.02573414519429207, "train_loss_llm": 0.5122296214103699, "grad_norm": 0.2638067603111267, "global_step": 165, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.04066107049584389, "train_loss_bc": 0.04009600728750229, "train_loss_llm": 0.5650624632835388, "grad_norm": 0.33961179852485657, "global_step": 166, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.018790556117892265, "train_loss_bc": 0.018201837316155434, "train_loss_llm": 0.5887188911437988, "grad_norm": 0.38523611426353455, "global_step": 167, "epoch": 0, "lr": 0.009999996631034345} +{"train_loss": 0.024333346635103226, "train_loss_bc": 0.023783767595887184, "train_loss_llm": 0.5495793223381042, "grad_norm": 0.4378797113895416, "global_step": 168, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.02820535935461521, "train_loss_bc": 0.027705006301403046, "train_loss_llm": 0.5003523826599121, "grad_norm": 0.053658343851566315, "global_step": 169, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.025200804695487022, "train_loss_bc": 0.02471126988530159, "train_loss_llm": 0.4895356297492981, "grad_norm": 0.10377084463834763, "global_step": 170, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.01955123245716095, "train_loss_bc": 0.019059764221310616, "train_loss_llm": 0.4914677143096924, "grad_norm": 0.14640706777572632, "global_step": 171, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.02138841524720192, "train_loss_bc": 0.02090127021074295, "train_loss_llm": 0.48714545369148254, "grad_norm": 0.1854257434606552, "global_step": 172, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.024181261658668518, "train_loss_bc": 0.023732315748929977, "train_loss_llm": 0.44894570112228394, "grad_norm": 0.23339462280273438, "global_step": 173, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.023717273026704788, "train_loss_bc": 0.02322128415107727, "train_loss_llm": 0.4959881603717804, "grad_norm": 0.2798902988433838, "global_step": 174, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.03485918045043945, "train_loss_bc": 0.034368738532066345, "train_loss_llm": 0.49044036865234375, "grad_norm": 0.343951553106308, "global_step": 175, "epoch": 0, "lr": 0.00999999599065253} +{"train_loss": 0.02320096641778946, "train_loss_bc": 0.022748133167624474, "train_loss_llm": 0.4528330862522125, "grad_norm": 0.38900986313819885, "global_step": 176, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.023330306634306908, "train_loss_bc": 0.022773388773202896, "train_loss_llm": 0.5569183826446533, "grad_norm": 0.0416216216981411, "global_step": 177, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.017154095694422722, "train_loss_bc": 0.01675502397119999, "train_loss_llm": 0.3990713059902191, "grad_norm": 0.06875938922166824, "global_step": 178, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.021150220185518265, "train_loss_bc": 0.02067718282341957, "train_loss_llm": 0.4730375111103058, "grad_norm": 0.10553700476884842, "global_step": 179, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.016837185248732567, "train_loss_bc": 0.016467537730932236, "train_loss_llm": 0.36964699625968933, "grad_norm": 0.13219793140888214, "global_step": 180, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.009770027361810207, "train_loss_bc": 0.009397734887897968, "train_loss_llm": 0.3722921311855316, "grad_norm": 0.15051698684692383, "global_step": 181, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.02333925850689411, "train_loss_bc": 0.02287432923913002, "train_loss_llm": 0.46492841839790344, "grad_norm": 0.18662676215171814, "global_step": 182, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.013727608136832714, "train_loss_bc": 0.01333148404955864, "train_loss_llm": 0.39612439274787903, "grad_norm": 0.21264775097370148, "global_step": 183, "epoch": 0, "lr": 0.009999995294585371} +{"train_loss": 0.01233526412397623, "train_loss_bc": 0.011927351355552673, "train_loss_llm": 0.4079124927520752, "grad_norm": 0.23667089641094208, "global_step": 184, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.014251401647925377, "train_loss_bc": 0.013858886435627937, "train_loss_llm": 0.3925148844718933, "grad_norm": 0.02162291295826435, "global_step": 185, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.02096753567457199, "train_loss_bc": 0.020425261929631233, "train_loss_llm": 0.542274534702301, "grad_norm": 0.05543696507811546, "global_step": 186, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.021045425906777382, "train_loss_bc": 0.02053808979690075, "train_loss_llm": 0.5073364973068237, "grad_norm": 0.08720546215772629, "global_step": 187, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.020341763272881508, "train_loss_bc": 0.019736729562282562, "train_loss_llm": 0.6050328016281128, "grad_norm": 0.11292923241853714, "global_step": 188, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.011690325103700161, "train_loss_bc": 0.011292900890111923, "train_loss_llm": 0.39742419123649597, "grad_norm": 0.13396863639354706, "global_step": 189, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.02014937624335289, "train_loss_bc": 0.019594522193074226, "train_loss_llm": 0.554853618144989, "grad_norm": 0.16246306896209717, "global_step": 190, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.014351895079016685, "train_loss_bc": 0.01393540296703577, "train_loss_llm": 0.41649240255355835, "grad_norm": 0.18816952407360077, "global_step": 191, "epoch": 0, "lr": 0.009999994542832874} +{"train_loss": 0.015411981381475925, "train_loss_bc": 0.014920342713594437, "train_loss_llm": 0.4916388988494873, "grad_norm": 0.2118474692106247, "global_step": 192, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.018817156553268433, "train_loss_bc": 0.018374208360910416, "train_loss_llm": 0.4429486393928528, "grad_norm": 0.031203927472233772, "global_step": 193, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.015957778319716454, "train_loss_bc": 0.015566591173410416, "train_loss_llm": 0.39118722081184387, "grad_norm": 0.06421653926372528, "global_step": 194, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.019087474793195724, "train_loss_bc": 0.018518388271331787, "train_loss_llm": 0.5690857172012329, "grad_norm": 0.0811726450920105, "global_step": 195, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.015710245817899704, "train_loss_bc": 0.01526118814945221, "train_loss_llm": 0.44905757904052734, "grad_norm": 0.10153987258672714, "global_step": 196, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.019453734159469604, "train_loss_bc": 0.018925407901406288, "train_loss_llm": 0.5283269882202148, "grad_norm": 0.1219358816742897, "global_step": 197, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.01505982130765915, "train_loss_bc": 0.014631738886237144, "train_loss_llm": 0.4280821681022644, "grad_norm": 0.14476309716701508, "global_step": 198, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.013033466413617134, "train_loss_bc": 0.012675212696194649, "train_loss_llm": 0.3582540452480316, "grad_norm": 0.1675001084804535, "global_step": 199, "epoch": 0, "lr": 0.009999993735395049} +{"train_loss": 0.01897766813635826, "train_loss_bc": 0.018507644534111023, "train_loss_llm": 0.4700234532356262, "grad_norm": 0.1958317756652832, "global_step": 200, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.021655641496181488, "train_loss_bc": 0.021151017397642136, "train_loss_llm": 0.5046237111091614, "grad_norm": 0.03384440392255783, "global_step": 201, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.019754817709326744, "train_loss_bc": 0.019261155277490616, "train_loss_llm": 0.49366283416748047, "grad_norm": 0.06363573670387268, "global_step": 202, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.0197446309030056, "train_loss_bc": 0.019323352724313736, "train_loss_llm": 0.4212789237499237, "grad_norm": 0.09140758961439133, "global_step": 203, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.02213365212082863, "train_loss_bc": 0.02167753502726555, "train_loss_llm": 0.4561164677143097, "grad_norm": 0.12779031693935394, "global_step": 204, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.018727730959653854, "train_loss_bc": 0.018266774713993073, "train_loss_llm": 0.46095675230026245, "grad_norm": 0.15023837983608246, "global_step": 205, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.021134980022907257, "train_loss_bc": 0.02065064013004303, "train_loss_llm": 0.48433929681777954, "grad_norm": 0.18333274126052856, "global_step": 206, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.018410563468933105, "train_loss_bc": 0.017892083153128624, "train_loss_llm": 0.5184803605079651, "grad_norm": 0.20620277523994446, "global_step": 207, "epoch": 0, "lr": 0.009999992872271905} +{"train_loss": 0.01952839083969593, "train_loss_bc": 0.01910785771906376, "train_loss_llm": 0.42053380608558655, "grad_norm": 0.23822638392448425, "global_step": 208, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.020005524158477783, "train_loss_bc": 0.01958916336297989, "train_loss_llm": 0.41636162996292114, "grad_norm": 0.022418994456529617, "global_step": 209, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.022034049034118652, "train_loss_bc": 0.02159287966787815, "train_loss_llm": 0.4411696493625641, "grad_norm": 0.058893270790576935, "global_step": 210, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.018601490184664726, "train_loss_bc": 0.018017925322055817, "train_loss_llm": 0.583564043045044, "grad_norm": 0.07501673698425293, "global_step": 211, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.021930024027824402, "train_loss_bc": 0.021384473890066147, "train_loss_llm": 0.5455496311187744, "grad_norm": 0.09862517565488815, "global_step": 212, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.020846663042902946, "train_loss_bc": 0.020350880920886993, "train_loss_llm": 0.49578237533569336, "grad_norm": 0.1402716189622879, "global_step": 213, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.01877172477543354, "train_loss_bc": 0.01824098639190197, "train_loss_llm": 0.5307385921478271, "grad_norm": 0.16469259560108185, "global_step": 214, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.020292270928621292, "train_loss_bc": 0.019869061186909676, "train_loss_llm": 0.42321062088012695, "grad_norm": 0.19249005615711212, "global_step": 215, "epoch": 0, "lr": 0.009999991953463454} +{"train_loss": 0.019689541310071945, "train_loss_bc": 0.019232220947742462, "train_loss_llm": 0.4573211669921875, "grad_norm": 0.21812190115451813, "global_step": 216, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.01748274266719818, "train_loss_bc": 0.016972113400697708, "train_loss_llm": 0.5106291174888611, "grad_norm": 0.01980498433113098, "global_step": 217, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.02484678477048874, "train_loss_bc": 0.024319060146808624, "train_loss_llm": 0.5277247428894043, "grad_norm": 0.0617092065513134, "global_step": 218, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.019288551062345505, "train_loss_bc": 0.01883828639984131, "train_loss_llm": 0.4502650499343872, "grad_norm": 0.08189266920089722, "global_step": 219, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.01973573863506317, "train_loss_bc": 0.019199654459953308, "train_loss_llm": 0.5360836982727051, "grad_norm": 0.09861791878938675, "global_step": 220, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.018722541630268097, "train_loss_bc": 0.018269415944814682, "train_loss_llm": 0.45312485098838806, "grad_norm": 0.12747113406658173, "global_step": 221, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.017685379832983017, "train_loss_bc": 0.0172601118683815, "train_loss_llm": 0.4252672493457794, "grad_norm": 0.15524466335773468, "global_step": 222, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.021818850189447403, "train_loss_bc": 0.02134229615330696, "train_loss_llm": 0.47655367851257324, "grad_norm": 0.1838337481021881, "global_step": 223, "epoch": 0, "lr": 0.0099999909789697} +{"train_loss": 0.019331879913806915, "train_loss_bc": 0.01881510019302368, "train_loss_llm": 0.5167800188064575, "grad_norm": 0.2143346071243286, "global_step": 224, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.017838943749666214, "train_loss_bc": 0.017424583435058594, "train_loss_llm": 0.4143611192703247, "grad_norm": 0.023944241926074028, "global_step": 225, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.01796240359544754, "train_loss_bc": 0.017589787021279335, "train_loss_llm": 0.37261566519737244, "grad_norm": 0.03415573388338089, "global_step": 226, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.01584581844508648, "train_loss_bc": 0.015357905998826027, "train_loss_llm": 0.4879117012023926, "grad_norm": 0.05189693719148636, "global_step": 227, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.01800801046192646, "train_loss_bc": 0.01758820191025734, "train_loss_llm": 0.4198092520236969, "grad_norm": 0.07924457639455795, "global_step": 228, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.018989915028214455, "train_loss_bc": 0.01854291930794716, "train_loss_llm": 0.4469965100288391, "grad_norm": 0.11897021532058716, "global_step": 229, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.02125917375087738, "train_loss_bc": 0.020760733634233475, "train_loss_llm": 0.49844038486480713, "grad_norm": 0.1377515345811844, "global_step": 230, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.019712205976247787, "train_loss_bc": 0.01920940726995468, "train_loss_llm": 0.5027981996536255, "grad_norm": 0.16281495988368988, "global_step": 231, "epoch": 0, "lr": 0.00999998994879066} +{"train_loss": 0.020320260897278786, "train_loss_bc": 0.019815631210803986, "train_loss_llm": 0.5046302080154419, "grad_norm": 0.1876341551542282, "global_step": 232, "epoch": 0, "lr": 0.009999988862926341} +{"train_loss": 0.013357514515519142, "train_loss_bc": 0.01291839312762022, "train_loss_llm": 0.43912118673324585, "grad_norm": 0.02517073042690754, "global_step": 233, "epoch": 0, "lr": 0.009999988862926341} +{"train_loss": 0.02109229937195778, "train_loss_bc": 0.020597826689481735, "train_loss_llm": 0.49447277188301086, "grad_norm": 0.04559013620018959, "global_step": 234, "epoch": 0, "lr": 0.009999988862926341} +{"train_loss": 0.02008131518959999, "train_loss_bc": 0.019501332193613052, "train_loss_llm": 0.579983115196228, "grad_norm": 0.07952536642551422, "global_step": 235, "epoch": 0, "lr": 0.009999988862926341} +{"train_loss": 0.016857489943504333, "train_loss_bc": 0.016379257664084435, "train_loss_llm": 0.4782329797744751, "grad_norm": 0.10649916529655457, "global_step": 236, "epoch": 0, "lr": 0.009999988862926341} +{"train_loss": 0.06850671758405677, "train_loss_bc": 0.014644688926637173, "train_loss_llm": 0.4756101667881012, "grad_norm": 0.13082890212535858, "global_step": 237, "epoch": 0, "lr": 0.009999988862926341, "train/cumulative_reward": 2.7083310524135573, "train/mean_score": 0.33428478816554785, "train/success_rate": 0.0, "test/cumulative_reward": 2.474044586385482, "test/mean_score": 0.3310451992587934, "test/success_rate": 0.0, "val_loss": 0.017693543806672096, "train_action_mse_error": 0.021953511983156204} +{"train_loss": 0.017798328772187233, "train_loss_bc": 0.017359893769025803, "train_loss_llm": 0.4384341835975647, "grad_norm": 0.157542422413826, "global_step": 238, "epoch": 1, "lr": 0.009999988862926341} +{"train_loss": 0.01802152208983898, "train_loss_bc": 0.017627805471420288, "train_loss_llm": 0.39371609687805176, "grad_norm": 0.17802225053310394, "global_step": 239, "epoch": 1, "lr": 0.009999988862926341} +{"train_loss": 0.018425248563289642, "train_loss_bc": 0.01776362583041191, "train_loss_llm": 0.6616224050521851, "grad_norm": 0.20682503283023834, "global_step": 240, "epoch": 1, "lr": 0.009999987721376759} +{"train_loss": 0.017822718247771263, "train_loss_bc": 0.017327211797237396, "train_loss_llm": 0.4955056309700012, "grad_norm": 0.03360544890165329, "global_step": 241, "epoch": 1, "lr": 0.009999987721376759} +{"train_loss": 0.01679021306335926, "train_loss_bc": 0.016283852979540825, "train_loss_llm": 0.5063599348068237, "grad_norm": 0.062126513570547104, "global_step": 242, "epoch": 1, "lr": 0.009999987721376759} +{"train_loss": 0.020830130204558372, "train_loss_bc": 0.020347915589809418, "train_loss_llm": 0.48221397399902344, "grad_norm": 0.08846676349639893, "global_step": 243, "epoch": 1, "lr": 0.009999987721376759} +{"train_loss": 0.011690114624798298, "train_loss_bc": 0.01113096158951521, "train_loss_llm": 0.559153139591217, "grad_norm": 0.1047411635518074, "global_step": 244, "epoch": 1, "lr": 0.009999987721376759} +{"train_loss": 0.020986376330256462, "train_loss_bc": 0.020374851301312447, "train_loss_llm": 0.6115252375602722, "grad_norm": 0.1375197023153305, "global_step": 245, "epoch": 1, "lr": 0.009999987721376759} +{"train_loss": 0.014499716460704803, "train_loss_bc": 0.013983565382659435, "train_loss_llm": 0.516150951385498, "grad_norm": 0.15900495648384094, "global_step": 246, "epoch": 1, "lr": 0.009999987721376759} +{"train_loss": 0.02040776051580906, "train_loss_bc": 0.01990542560815811, "train_loss_llm": 0.502334475517273, "grad_norm": 0.1920090615749359, "global_step": 247, "epoch": 1, "lr": 0.009999987721376759} +{"train_loss": 0.008450948633253574, "train_loss_bc": 0.00804897490888834, "train_loss_llm": 0.4019736349582672, "grad_norm": 0.20613166689872742, "global_step": 248, "epoch": 1, "lr": 0.009999986524141925} +{"train_loss": 0.01662587560713291, "train_loss_bc": 0.016171330586075783, "train_loss_llm": 0.45454519987106323, "grad_norm": 0.02374984882771969, "global_step": 249, "epoch": 1, "lr": 0.009999986524141925} +{"train_loss": 0.015652479603886604, "train_loss_bc": 0.015175838023424149, "train_loss_llm": 0.47664228081703186, "grad_norm": 0.04423899948596954, "global_step": 250, "epoch": 1, "lr": 0.009999986524141925} +{"train_loss": 0.015529230237007141, "train_loss_bc": 0.015086237341165543, "train_loss_llm": 0.4429924190044403, "grad_norm": 0.06532718986272812, "global_step": 251, "epoch": 1, "lr": 0.009999986524141925} +{"train_loss": 0.018649809062480927, "train_loss_bc": 0.018053732812404633, "train_loss_llm": 0.5960763692855835, "grad_norm": 0.09744929522275925, "global_step": 252, "epoch": 1, "lr": 0.009999986524141925} +{"train_loss": 0.014919068664312363, "train_loss_bc": 0.014484588988125324, "train_loss_llm": 0.4344799220561981, "grad_norm": 0.1184827908873558, "global_step": 253, "epoch": 1, "lr": 0.009999986524141925} +{"train_loss": 0.012522549368441105, "train_loss_bc": 0.0121694877743721, "train_loss_llm": 0.3530616760253906, "grad_norm": 0.13075849413871765, "global_step": 254, "epoch": 1, "lr": 0.009999986524141925} +{"train_loss": 0.017960211262106895, "train_loss_bc": 0.017518820241093636, "train_loss_llm": 0.4413911700248718, "grad_norm": 0.15736038982868195, "global_step": 255, "epoch": 1, "lr": 0.009999986524141925} +{"train_loss": 0.016007019206881523, "train_loss_bc": 0.015464743599295616, "train_loss_llm": 0.54227614402771, "grad_norm": 0.17768608033657074, "global_step": 256, "epoch": 1, "lr": 0.00999998527122185} +{"train_loss": 0.01158602349460125, "train_loss_bc": 0.011238181963562965, "train_loss_llm": 0.34784168004989624, "grad_norm": 0.010093354620039463, "global_step": 257, "epoch": 1, "lr": 0.00999998527122185} +{"train_loss": 0.010712604969739914, "train_loss_bc": 0.010269438847899437, "train_loss_llm": 0.44316577911376953, "grad_norm": 0.021126240491867065, "global_step": 258, "epoch": 1, "lr": 0.00999998527122185} +{"train_loss": 0.01096857525408268, "train_loss_bc": 0.010642854496836662, "train_loss_llm": 0.3257203996181488, "grad_norm": 0.03387540951371193, "global_step": 259, "epoch": 1, "lr": 0.00999998527122185} +{"train_loss": 0.01653478853404522, "train_loss_bc": 0.016012927517294884, "train_loss_llm": 0.5218604207038879, "grad_norm": 0.04911898449063301, "global_step": 260, "epoch": 1, "lr": 0.00999998527122185} +{"train_loss": 0.017165496945381165, "train_loss_bc": 0.01656423695385456, "train_loss_llm": 0.6012594699859619, "grad_norm": 0.06821974366903305, "global_step": 261, "epoch": 1, "lr": 0.00999998527122185} +{"train_loss": 0.012002137489616871, "train_loss_bc": 0.011621439829468727, "train_loss_llm": 0.38069722056388855, "grad_norm": 0.08292040973901749, "global_step": 262, "epoch": 1, "lr": 0.00999998527122185} +{"train_loss": 0.018928784877061844, "train_loss_bc": 0.018516037613153458, "train_loss_llm": 0.41274651885032654, "grad_norm": 0.09135116636753082, "global_step": 263, "epoch": 1, "lr": 0.00999998527122185} +{"train_loss": 0.018131952732801437, "train_loss_bc": 0.017562976107001305, "train_loss_llm": 0.5689768195152283, "grad_norm": 0.11499010771512985, "global_step": 264, "epoch": 1, "lr": 0.009999983962616553} +{"train_loss": 0.012489533051848412, "train_loss_bc": 0.011947352439165115, "train_loss_llm": 0.5421801805496216, "grad_norm": 0.013015178963541985, "global_step": 265, "epoch": 1, "lr": 0.009999983962616553} +{"train_loss": 0.013243050314486027, "train_loss_bc": 0.012746745720505714, "train_loss_llm": 0.4963045120239258, "grad_norm": 0.020864736288785934, "global_step": 266, "epoch": 1, "lr": 0.009999983962616553} +{"train_loss": 0.010356509126722813, "train_loss_bc": 0.009778052568435669, "train_loss_llm": 0.5784561634063721, "grad_norm": 0.0276536475867033, "global_step": 267, "epoch": 1, "lr": 0.009999983962616553} +{"train_loss": 0.012164799496531487, "train_loss_bc": 0.011662531644105911, "train_loss_llm": 0.5022678375244141, "grad_norm": 0.03239491581916809, "global_step": 268, "epoch": 1, "lr": 0.009999983962616553} +{"train_loss": 0.014096668921411037, "train_loss_bc": 0.0135754169896245, "train_loss_llm": 0.5212522745132446, "grad_norm": 0.0413699746131897, "global_step": 269, "epoch": 1, "lr": 0.009999983962616553} +{"train_loss": 0.011443986557424068, "train_loss_bc": 0.011009275913238525, "train_loss_llm": 0.43471041321754456, "grad_norm": 0.038190145045518875, "global_step": 270, "epoch": 1, "lr": 0.009999983962616553} +{"train_loss": 0.01239698100835085, "train_loss_bc": 0.011889282613992691, "train_loss_llm": 0.5076982975006104, "grad_norm": 0.04352530464529991, "global_step": 271, "epoch": 1, "lr": 0.009999983962616553} +{"train_loss": 0.013387206010520458, "train_loss_bc": 0.012860596179962158, "train_loss_llm": 0.5266100168228149, "grad_norm": 0.05260344222187996, "global_step": 272, "epoch": 1, "lr": 0.009999982598326042} +{"train_loss": 0.01481005921959877, "train_loss_bc": 0.014252791181206703, "train_loss_llm": 0.5572683215141296, "grad_norm": 0.0161435529589653, "global_step": 273, "epoch": 1, "lr": 0.009999982598326042} +{"train_loss": 0.009104442782700062, "train_loss_bc": 0.008664367720484734, "train_loss_llm": 0.44007524847984314, "grad_norm": 0.02379443496465683, "global_step": 274, "epoch": 1, "lr": 0.009999982598326042} +{"train_loss": 0.016264215111732483, "train_loss_bc": 0.015678897500038147, "train_loss_llm": 0.5853180885314941, "grad_norm": 0.043524160981178284, "global_step": 275, "epoch": 1, "lr": 0.009999982598326042} +{"train_loss": 0.017824366688728333, "train_loss_bc": 0.017341842874884605, "train_loss_llm": 0.4825235903263092, "grad_norm": 0.062320295721292496, "global_step": 276, "epoch": 1, "lr": 0.009999982598326042} +{"train_loss": 0.018680082634091377, "train_loss_bc": 0.018099233508110046, "train_loss_llm": 0.5808486342430115, "grad_norm": 0.08115622401237488, "global_step": 277, "epoch": 1, "lr": 0.009999982598326042} +{"train_loss": 0.012962117791175842, "train_loss_bc": 0.012479234486818314, "train_loss_llm": 0.4828835129737854, "grad_norm": 0.08807636052370071, "global_step": 278, "epoch": 1, "lr": 0.009999982598326042} +{"train_loss": 0.01446839701384306, "train_loss_bc": 0.013988605700433254, "train_loss_llm": 0.4797913432121277, "grad_norm": 0.09451211988925934, "global_step": 279, "epoch": 1, "lr": 0.009999982598326042} +{"train_loss": 0.01429255772382021, "train_loss_bc": 0.013756824657320976, "train_loss_llm": 0.5357327461242676, "grad_norm": 0.10867451131343842, "global_step": 280, "epoch": 1, "lr": 0.00999998117835034} +{"train_loss": 0.013955993577837944, "train_loss_bc": 0.013429549522697926, "train_loss_llm": 0.5264439582824707, "grad_norm": 0.025083180516958237, "global_step": 281, "epoch": 1, "lr": 0.00999998117835034} +{"train_loss": 0.016641786321997643, "train_loss_bc": 0.01618514023721218, "train_loss_llm": 0.4566459357738495, "grad_norm": 0.045515093952417374, "global_step": 282, "epoch": 1, "lr": 0.00999998117835034} +{"train_loss": 0.014012634754180908, "train_loss_bc": 0.013572480529546738, "train_loss_llm": 0.4401538670063019, "grad_norm": 0.06089504435658455, "global_step": 283, "epoch": 1, "lr": 0.00999998117835034} +{"train_loss": 0.01873624697327614, "train_loss_bc": 0.01815981976687908, "train_loss_llm": 0.5764279365539551, "grad_norm": 0.07969119399785995, "global_step": 284, "epoch": 1, "lr": 0.00999998117835034} +{"train_loss": 0.017088143154978752, "train_loss_bc": 0.01662488281726837, "train_loss_llm": 0.46325966715812683, "grad_norm": 0.09512478858232498, "global_step": 285, "epoch": 1, "lr": 0.00999998117835034} +{"train_loss": 0.010966386646032333, "train_loss_bc": 0.010503709316253662, "train_loss_llm": 0.46267759799957275, "grad_norm": 0.11192868649959564, "global_step": 286, "epoch": 1, "lr": 0.00999998117835034} +{"train_loss": 0.01674928329885006, "train_loss_bc": 0.016327429562807083, "train_loss_llm": 0.4218546152114868, "grad_norm": 0.1299564689397812, "global_step": 287, "epoch": 1, "lr": 0.00999998117835034} +{"train_loss": 0.016223106533288956, "train_loss_bc": 0.015695005655288696, "train_loss_llm": 0.5281013250350952, "grad_norm": 0.14104627072811127, "global_step": 288, "epoch": 1, "lr": 0.009999979702689454} +{"train_loss": 0.017164213582873344, "train_loss_bc": 0.016666820272803307, "train_loss_llm": 0.4973934590816498, "grad_norm": 0.02183571644127369, "global_step": 289, "epoch": 1, "lr": 0.009999979702689454} +{"train_loss": 0.01507254596799612, "train_loss_bc": 0.014609228819608688, "train_loss_llm": 0.46331721544265747, "grad_norm": 0.03880901262164116, "global_step": 290, "epoch": 1, "lr": 0.009999979702689454} +{"train_loss": 0.019254591315984726, "train_loss_bc": 0.01874985173344612, "train_loss_llm": 0.5047386884689331, "grad_norm": 0.06385096162557602, "global_step": 291, "epoch": 1, "lr": 0.009999979702689454} +{"train_loss": 0.0154347512871027, "train_loss_bc": 0.01500864326953888, "train_loss_llm": 0.42610809206962585, "grad_norm": 0.08098644018173218, "global_step": 292, "epoch": 1, "lr": 0.009999979702689454} +{"train_loss": 0.01903417333960533, "train_loss_bc": 0.018556609749794006, "train_loss_llm": 0.47756439447402954, "grad_norm": 0.10871503502130508, "global_step": 293, "epoch": 1, "lr": 0.009999979702689454} +{"train_loss": 0.0156480111181736, "train_loss_bc": 0.015174117870628834, "train_loss_llm": 0.47389230132102966, "grad_norm": 0.13166505098342896, "global_step": 294, "epoch": 1, "lr": 0.009999979702689454} +{"train_loss": 0.016828790307044983, "train_loss_bc": 0.016389530152082443, "train_loss_llm": 0.43926095962524414, "grad_norm": 0.1540358066558838, "global_step": 295, "epoch": 1, "lr": 0.009999979702689454} +{"train_loss": 0.013542444445192814, "train_loss_bc": 0.013059152290225029, "train_loss_llm": 0.48329171538352966, "grad_norm": 0.17128632962703705, "global_step": 296, "epoch": 1, "lr": 0.00999997817134341} +{"train_loss": 0.01356798131018877, "train_loss_bc": 0.013153335079550743, "train_loss_llm": 0.41464588046073914, "grad_norm": 0.01961551606655121, "global_step": 297, "epoch": 1, "lr": 0.00999997817134341} +{"train_loss": 0.015729112550616264, "train_loss_bc": 0.015209322795271873, "train_loss_llm": 0.5197891592979431, "grad_norm": 0.04597029462456703, "global_step": 298, "epoch": 1, "lr": 0.00999997817134341} +{"train_loss": 0.015187690034508705, "train_loss_bc": 0.014714469201862812, "train_loss_llm": 0.4732206165790558, "grad_norm": 0.06501750648021698, "global_step": 299, "epoch": 1, "lr": 0.00999997817134341} +{"train_loss": 0.01496143825352192, "train_loss_bc": 0.014608250930905342, "train_loss_llm": 0.35318759083747864, "grad_norm": 0.09145065397024155, "global_step": 300, "epoch": 1, "lr": 0.00999997817134341} +{"train_loss": 0.014216883108019829, "train_loss_bc": 0.013763219118118286, "train_loss_llm": 0.4536639451980591, "grad_norm": 0.10366859287023544, "global_step": 301, "epoch": 1, "lr": 0.00999997817134341} +{"train_loss": 0.01669706590473652, "train_loss_bc": 0.016175638884305954, "train_loss_llm": 0.5214270353317261, "grad_norm": 0.12138961255550385, "global_step": 302, "epoch": 1, "lr": 0.00999997817134341} +{"train_loss": 0.014355774968862534, "train_loss_bc": 0.013972668908536434, "train_loss_llm": 0.3831060230731964, "grad_norm": 0.14051002264022827, "global_step": 303, "epoch": 1, "lr": 0.00999997817134341} +{"train_loss": 0.0146627863869071, "train_loss_bc": 0.014223872683942318, "train_loss_llm": 0.4389132857322693, "grad_norm": 0.15912242233753204, "global_step": 304, "epoch": 1, "lr": 0.009999976584312217} +{"train_loss": 0.01133162435144186, "train_loss_bc": 0.010947933420538902, "train_loss_llm": 0.3836905360221863, "grad_norm": 0.02009022980928421, "global_step": 305, "epoch": 1, "lr": 0.009999976584312217} +{"train_loss": 0.01270595658570528, "train_loss_bc": 0.012229321524500847, "train_loss_llm": 0.4766354262828827, "grad_norm": 0.029522329568862915, "global_step": 306, "epoch": 1, "lr": 0.009999976584312217} +{"train_loss": 0.014936204068362713, "train_loss_bc": 0.014451291412115097, "train_loss_llm": 0.4849129617214203, "grad_norm": 0.056380923837423325, "global_step": 307, "epoch": 1, "lr": 0.009999976584312217} +{"train_loss": 0.010747802443802357, "train_loss_bc": 0.010318206623196602, "train_loss_llm": 0.4295954704284668, "grad_norm": 0.07131356745958328, "global_step": 308, "epoch": 1, "lr": 0.009999976584312217} +{"train_loss": 0.010907587595283985, "train_loss_bc": 0.010417597368359566, "train_loss_llm": 0.4899904727935791, "grad_norm": 0.08291061967611313, "global_step": 309, "epoch": 1, "lr": 0.009999976584312217} +{"train_loss": 0.01565735787153244, "train_loss_bc": 0.01524802204221487, "train_loss_llm": 0.4093364179134369, "grad_norm": 0.1052742674946785, "global_step": 310, "epoch": 1, "lr": 0.009999976584312217} +{"train_loss": 0.013044213876128197, "train_loss_bc": 0.012592458166182041, "train_loss_llm": 0.4517558515071869, "grad_norm": 0.12751010060310364, "global_step": 311, "epoch": 1, "lr": 0.009999976584312217} +{"train_loss": 0.012058139778673649, "train_loss_bc": 0.011555514298379421, "train_loss_llm": 0.5026251077651978, "grad_norm": 0.1479104608297348, "global_step": 312, "epoch": 1, "lr": 0.009999974941595897} +{"train_loss": 0.010018293745815754, "train_loss_bc": 0.009687970392405987, "train_loss_llm": 0.3303234279155731, "grad_norm": 0.01355504896491766, "global_step": 313, "epoch": 1, "lr": 0.009999974941595897} +{"train_loss": 0.012328105047345161, "train_loss_bc": 0.011914866045117378, "train_loss_llm": 0.4132387638092041, "grad_norm": 0.01594623737037182, "global_step": 314, "epoch": 1, "lr": 0.009999974941595897} +{"train_loss": 0.013978242874145508, "train_loss_bc": 0.013544456101953983, "train_loss_llm": 0.4337867498397827, "grad_norm": 0.032734472304582596, "global_step": 315, "epoch": 1, "lr": 0.009999974941595897} +{"train_loss": 0.010386270470917225, "train_loss_bc": 0.010068733245134354, "train_loss_llm": 0.31753700971603394, "grad_norm": 0.047776710242033005, "global_step": 316, "epoch": 1, "lr": 0.009999974941595897} +{"train_loss": 0.012578755617141724, "train_loss_bc": 0.01214287057518959, "train_loss_llm": 0.4358847141265869, "grad_norm": 0.0635766088962555, "global_step": 317, "epoch": 1, "lr": 0.009999974941595897} +{"train_loss": 0.012422928586602211, "train_loss_bc": 0.012042918242514133, "train_loss_llm": 0.38001012802124023, "grad_norm": 0.07768117636442184, "global_step": 318, "epoch": 1, "lr": 0.009999974941595897} +{"train_loss": 0.010467208921909332, "train_loss_bc": 0.009988697245717049, "train_loss_llm": 0.478511244058609, "grad_norm": 0.08535484224557877, "global_step": 319, "epoch": 1, "lr": 0.009999974941595897} +{"train_loss": 0.012417087331414223, "train_loss_bc": 0.01195848360657692, "train_loss_llm": 0.45860394835472107, "grad_norm": 0.10900090634822845, "global_step": 320, "epoch": 1, "lr": 0.009999973243194467} +{"train_loss": 0.01247399765998125, "train_loss_bc": 0.01207180880010128, "train_loss_llm": 0.4021890163421631, "grad_norm": 0.010779356583952904, "global_step": 321, "epoch": 1, "lr": 0.009999973243194467} +{"train_loss": 0.011968130245804787, "train_loss_bc": 0.0115253496915102, "train_loss_llm": 0.44278010725975037, "grad_norm": 0.016100643202662468, "global_step": 322, "epoch": 1, "lr": 0.009999973243194467} +{"train_loss": 0.011378668248653412, "train_loss_bc": 0.010935855098068714, "train_loss_llm": 0.4428134262561798, "grad_norm": 0.02845556102693081, "global_step": 323, "epoch": 1, "lr": 0.009999973243194467} +{"train_loss": 0.010558527894318104, "train_loss_bc": 0.010113537311553955, "train_loss_llm": 0.4449908137321472, "grad_norm": 0.02639344334602356, "global_step": 324, "epoch": 1, "lr": 0.009999973243194467} +{"train_loss": 0.008580698631703854, "train_loss_bc": 0.008138567209243774, "train_loss_llm": 0.44213131070137024, "grad_norm": 0.039085108786821365, "global_step": 325, "epoch": 1, "lr": 0.009999973243194467} +{"train_loss": 0.013355431146919727, "train_loss_bc": 0.012834073975682259, "train_loss_llm": 0.5213567614555359, "grad_norm": 0.04931477829813957, "global_step": 326, "epoch": 1, "lr": 0.009999973243194467} +{"train_loss": 0.008711469359695911, "train_loss_bc": 0.008368385955691338, "train_loss_llm": 0.3430837392807007, "grad_norm": 0.05239582434296608, "global_step": 327, "epoch": 1, "lr": 0.009999973243194467} +{"train_loss": 0.009385243989527225, "train_loss_bc": 0.008970173075795174, "train_loss_llm": 0.41507115960121155, "grad_norm": 0.05491437017917633, "global_step": 328, "epoch": 1, "lr": 0.009999971489107947} +{"train_loss": 0.011174674145877361, "train_loss_bc": 0.010818562470376492, "train_loss_llm": 0.3561112880706787, "grad_norm": 0.011578625068068504, "global_step": 329, "epoch": 1, "lr": 0.009999971489107947} +{"train_loss": 0.013502768240869045, "train_loss_bc": 0.013065600767731667, "train_loss_llm": 0.4371674656867981, "grad_norm": 0.026880592107772827, "global_step": 330, "epoch": 1, "lr": 0.009999971489107947} +{"train_loss": 0.012593085877597332, "train_loss_bc": 0.012244774959981441, "train_loss_llm": 0.34831055998802185, "grad_norm": 0.041259463876485825, "global_step": 331, "epoch": 1, "lr": 0.009999971489107947} +{"train_loss": 0.009598112665116787, "train_loss_bc": 0.009030044078826904, "train_loss_llm": 0.5680687427520752, "grad_norm": 0.05241383612155914, "global_step": 332, "epoch": 1, "lr": 0.009999971489107947} +{"train_loss": 0.01214287243783474, "train_loss_bc": 0.011670759879052639, "train_loss_llm": 0.4721129238605499, "grad_norm": 0.07233195751905441, "global_step": 333, "epoch": 1, "lr": 0.009999971489107947} +{"train_loss": 0.015494297258555889, "train_loss_bc": 0.01502845250070095, "train_loss_llm": 0.46584439277648926, "grad_norm": 0.0923396646976471, "global_step": 334, "epoch": 1, "lr": 0.009999971489107947} +{"train_loss": 0.01266135461628437, "train_loss_bc": 0.012207714840769768, "train_loss_llm": 0.4536397457122803, "grad_norm": 0.10818523913621902, "global_step": 335, "epoch": 1, "lr": 0.009999971489107947} +{"train_loss": 0.012813151814043522, "train_loss_bc": 0.012354401871562004, "train_loss_llm": 0.45874953269958496, "grad_norm": 0.1238781213760376, "global_step": 336, "epoch": 1, "lr": 0.009999969679336354} +{"train_loss": 0.013637524098157883, "train_loss_bc": 0.013167794793844223, "train_loss_llm": 0.46972957253456116, "grad_norm": 0.017869004979729652, "global_step": 337, "epoch": 1, "lr": 0.009999969679336354} +{"train_loss": 0.01720421575009823, "train_loss_bc": 0.01666909269988537, "train_loss_llm": 0.5351230502128601, "grad_norm": 0.04792848974466324, "global_step": 338, "epoch": 1, "lr": 0.009999969679336354} +{"train_loss": 0.015335088595747948, "train_loss_bc": 0.014754555188119411, "train_loss_llm": 0.5805330276489258, "grad_norm": 0.06892868131399155, "global_step": 339, "epoch": 1, "lr": 0.009999969679336354} +{"train_loss": 0.010542848147451878, "train_loss_bc": 0.01001989096403122, "train_loss_llm": 0.5229572653770447, "grad_norm": 0.0866188034415245, "global_step": 340, "epoch": 1, "lr": 0.009999969679336354} +{"train_loss": 0.01668214052915573, "train_loss_bc": 0.01612561196088791, "train_loss_llm": 0.5565277934074402, "grad_norm": 0.10684026032686234, "global_step": 341, "epoch": 1, "lr": 0.009999969679336354} +{"train_loss": 0.013128525577485561, "train_loss_bc": 0.012647897005081177, "train_loss_llm": 0.4806285798549652, "grad_norm": 0.1274142861366272, "global_step": 342, "epoch": 1, "lr": 0.009999969679336354} +{"train_loss": 0.012806318700313568, "train_loss_bc": 0.012260029092431068, "train_loss_llm": 0.5462898015975952, "grad_norm": 0.1410902589559555, "global_step": 343, "epoch": 1, "lr": 0.009999969679336354} +{"train_loss": 0.009487541392445564, "train_loss_bc": 0.009019860997796059, "train_loss_llm": 0.4676806926727295, "grad_norm": 0.15223082900047302, "global_step": 344, "epoch": 1, "lr": 0.00999996781387971} +{"train_loss": 0.016361601650714874, "train_loss_bc": 0.01573288068175316, "train_loss_llm": 0.6287200450897217, "grad_norm": 0.023004453629255295, "global_step": 345, "epoch": 1, "lr": 0.00999996781387971} +{"train_loss": 0.01248890906572342, "train_loss_bc": 0.011881167069077492, "train_loss_llm": 0.607742190361023, "grad_norm": 0.031962476670742035, "global_step": 346, "epoch": 1, "lr": 0.00999996781387971} +{"train_loss": 0.01358707994222641, "train_loss_bc": 0.013073929585516453, "train_loss_llm": 0.5131505727767944, "grad_norm": 0.04822782427072525, "global_step": 347, "epoch": 1, "lr": 0.00999996781387971} +{"train_loss": 0.015235469676554203, "train_loss_bc": 0.01475644949823618, "train_loss_llm": 0.4790200889110565, "grad_norm": 0.06381597369909286, "global_step": 348, "epoch": 1, "lr": 0.00999996781387971} +{"train_loss": 0.01751648262143135, "train_loss_bc": 0.016855884343385696, "train_loss_llm": 0.6605973243713379, "grad_norm": 0.08681994676589966, "global_step": 349, "epoch": 1, "lr": 0.00999996781387971} +{"train_loss": 0.014208652079105377, "train_loss_bc": 0.013812784105539322, "train_loss_llm": 0.3958682119846344, "grad_norm": 0.10667064785957336, "global_step": 350, "epoch": 1, "lr": 0.00999996781387971} +{"train_loss": 0.011572916992008686, "train_loss_bc": 0.011084744706749916, "train_loss_llm": 0.48817187547683716, "grad_norm": 0.1193293035030365, "global_step": 351, "epoch": 1, "lr": 0.00999996781387971} +{"train_loss": 0.01146447192877531, "train_loss_bc": 0.01082993671298027, "train_loss_llm": 0.6345353126525879, "grad_norm": 0.13804349303245544, "global_step": 352, "epoch": 1, "lr": 0.009999965892738036} +{"train_loss": 0.012645299546420574, "train_loss_bc": 0.012096133083105087, "train_loss_llm": 0.5491666793823242, "grad_norm": 0.014580151066184044, "global_step": 353, "epoch": 1, "lr": 0.009999965892738036} +{"train_loss": 0.012388558126986027, "train_loss_bc": 0.011813423596322536, "train_loss_llm": 0.57513427734375, "grad_norm": 0.029008738696575165, "global_step": 354, "epoch": 1, "lr": 0.009999965892738036} +{"train_loss": 0.015240795910358429, "train_loss_bc": 0.014640103094279766, "train_loss_llm": 0.6006927490234375, "grad_norm": 0.03927018493413925, "global_step": 355, "epoch": 1, "lr": 0.009999965892738036} +{"train_loss": 0.011942628771066666, "train_loss_bc": 0.011564518325030804, "train_loss_llm": 0.3781103491783142, "grad_norm": 0.056585244834423065, "global_step": 356, "epoch": 1, "lr": 0.009999965892738036} +{"train_loss": 0.012461038306355476, "train_loss_bc": 0.011909164488315582, "train_loss_llm": 0.5518735647201538, "grad_norm": 0.06560716778039932, "global_step": 357, "epoch": 1, "lr": 0.009999965892738036} +{"train_loss": 0.015487094409763813, "train_loss_bc": 0.015003865584731102, "train_loss_llm": 0.48322877287864685, "grad_norm": 0.0779387354850769, "global_step": 358, "epoch": 1, "lr": 0.009999965892738036} +{"train_loss": 0.010963214561343193, "train_loss_bc": 0.010492322966456413, "train_loss_llm": 0.47089120745658875, "grad_norm": 0.09135229140520096, "global_step": 359, "epoch": 1, "lr": 0.009999965892738036} +{"train_loss": 0.009490950964391232, "train_loss_bc": 0.00908201839774847, "train_loss_llm": 0.40893298387527466, "grad_norm": 0.11029430478811264, "global_step": 360, "epoch": 1, "lr": 0.009999963915911353} +{"train_loss": 0.009366891346871853, "train_loss_bc": 0.008832603693008423, "train_loss_llm": 0.534287691116333, "grad_norm": 0.011780070140957832, "global_step": 361, "epoch": 1, "lr": 0.009999963915911353} +{"train_loss": 0.01090614590793848, "train_loss_bc": 0.010547686368227005, "train_loss_llm": 0.3584598898887634, "grad_norm": 0.018876101821660995, "global_step": 362, "epoch": 1, "lr": 0.009999963915911353} +{"train_loss": 0.01047124806791544, "train_loss_bc": 0.00999793503433466, "train_loss_llm": 0.4733126759529114, "grad_norm": 0.0347822941839695, "global_step": 363, "epoch": 1, "lr": 0.009999963915911353} +{"train_loss": 0.013421941548585892, "train_loss_bc": 0.01281831320375204, "train_loss_llm": 0.603628396987915, "grad_norm": 0.0416879765689373, "global_step": 364, "epoch": 1, "lr": 0.009999963915911353} +{"train_loss": 0.012736831791698933, "train_loss_bc": 0.01226730551570654, "train_loss_llm": 0.4695262610912323, "grad_norm": 0.061996493488550186, "global_step": 365, "epoch": 1, "lr": 0.009999963915911353} +{"train_loss": 0.015469906851649284, "train_loss_bc": 0.014822958037257195, "train_loss_llm": 0.6469485759735107, "grad_norm": 0.06635187566280365, "global_step": 366, "epoch": 1, "lr": 0.009999963915911353} +{"train_loss": 0.012995130382478237, "train_loss_bc": 0.012509873136878014, "train_loss_llm": 0.4852573275566101, "grad_norm": 0.08635496348142624, "global_step": 367, "epoch": 1, "lr": 0.009999963915911353} +{"train_loss": 0.00998271256685257, "train_loss_bc": 0.009579310193657875, "train_loss_llm": 0.4034022390842438, "grad_norm": 0.10520216077566147, "global_step": 368, "epoch": 1, "lr": 0.009999961883399683} +{"train_loss": 0.0111773069947958, "train_loss_bc": 0.010663645341992378, "train_loss_llm": 0.5136619806289673, "grad_norm": 0.013957403600215912, "global_step": 369, "epoch": 1, "lr": 0.009999961883399683} +{"train_loss": 0.010809720493853092, "train_loss_bc": 0.010342610068619251, "train_loss_llm": 0.46711012721061707, "grad_norm": 0.028792060911655426, "global_step": 370, "epoch": 1, "lr": 0.009999961883399683} +{"train_loss": 0.013553488999605179, "train_loss_bc": 0.01306125707924366, "train_loss_llm": 0.49223223328590393, "grad_norm": 0.03614845499396324, "global_step": 371, "epoch": 1, "lr": 0.009999961883399683} +{"train_loss": 0.008836585097014904, "train_loss_bc": 0.008274243213236332, "train_loss_llm": 0.5623416900634766, "grad_norm": 0.04330018162727356, "global_step": 372, "epoch": 1, "lr": 0.009999961883399683} +{"train_loss": 0.008892491459846497, "train_loss_bc": 0.008484721183776855, "train_loss_llm": 0.40777063369750977, "grad_norm": 0.061764974147081375, "global_step": 373, "epoch": 1, "lr": 0.009999961883399683} +{"train_loss": 0.011546115390956402, "train_loss_bc": 0.011045539751648903, "train_loss_llm": 0.5005753636360168, "grad_norm": 0.07327363640069962, "global_step": 374, "epoch": 1, "lr": 0.009999961883399683} +{"train_loss": 0.013276085257530212, "train_loss_bc": 0.012809041887521744, "train_loss_llm": 0.4670429527759552, "grad_norm": 0.08208861202001572, "global_step": 375, "epoch": 1, "lr": 0.009999961883399683} +{"train_loss": 0.010347362607717514, "train_loss_bc": 0.009987818077206612, "train_loss_llm": 0.35954442620277405, "grad_norm": 0.09811953455209732, "global_step": 376, "epoch": 1, "lr": 0.009999959795203048} +{"train_loss": 0.010294072329998016, "train_loss_bc": 0.009917671792209148, "train_loss_llm": 0.37640058994293213, "grad_norm": 0.01656760647892952, "global_step": 377, "epoch": 1, "lr": 0.009999959795203048} +{"train_loss": 0.012072709389030933, "train_loss_bc": 0.011694014072418213, "train_loss_llm": 0.37869489192962646, "grad_norm": 0.038055505603551865, "global_step": 378, "epoch": 1, "lr": 0.009999959795203048} +{"train_loss": 0.013379747048020363, "train_loss_bc": 0.012885721400380135, "train_loss_llm": 0.4940251410007477, "grad_norm": 0.048791639506816864, "global_step": 379, "epoch": 1, "lr": 0.009999959795203048} +{"train_loss": 0.010779447853565216, "train_loss_bc": 0.010418189689517021, "train_loss_llm": 0.3612585663795471, "grad_norm": 0.06931304186582565, "global_step": 380, "epoch": 1, "lr": 0.009999959795203048} +{"train_loss": 0.013290653005242348, "train_loss_bc": 0.0127793550491333, "train_loss_llm": 0.5112981796264648, "grad_norm": 0.08744651824235916, "global_step": 381, "epoch": 1, "lr": 0.009999959795203048} +{"train_loss": 0.012638435699045658, "train_loss_bc": 0.012112822383642197, "train_loss_llm": 0.5256132483482361, "grad_norm": 0.08734080195426941, "global_step": 382, "epoch": 1, "lr": 0.009999959795203048} +{"train_loss": 0.01276348065584898, "train_loss_bc": 0.01233246922492981, "train_loss_llm": 0.43101125955581665, "grad_norm": 0.11115267127752304, "global_step": 383, "epoch": 1, "lr": 0.009999959795203048} +{"train_loss": 0.013118097558617592, "train_loss_bc": 0.012599822133779526, "train_loss_llm": 0.5182749629020691, "grad_norm": 0.11958809196949005, "global_step": 384, "epoch": 1, "lr": 0.009999957651321473} +{"train_loss": 0.01025390811264515, "train_loss_bc": 0.009825386106967926, "train_loss_llm": 0.4285220801830292, "grad_norm": 0.018952684476971626, "global_step": 385, "epoch": 1, "lr": 0.009999957651321473} +{"train_loss": 0.010963615961372852, "train_loss_bc": 0.010562529787421227, "train_loss_llm": 0.40108659863471985, "grad_norm": 0.03306251019239426, "global_step": 386, "epoch": 1, "lr": 0.009999957651321473} +{"train_loss": 0.010541597381234169, "train_loss_bc": 0.010126705281436443, "train_loss_llm": 0.4148922562599182, "grad_norm": 0.042092613875865936, "global_step": 387, "epoch": 1, "lr": 0.009999957651321473} +{"train_loss": 0.008702381514012814, "train_loss_bc": 0.008199061267077923, "train_loss_llm": 0.5033202171325684, "grad_norm": 0.043085530400276184, "global_step": 388, "epoch": 1, "lr": 0.009999957651321473} +{"train_loss": 0.008918298408389091, "train_loss_bc": 0.008477844297885895, "train_loss_llm": 0.4404541254043579, "grad_norm": 0.055809881538152695, "global_step": 389, "epoch": 1, "lr": 0.009999957651321473} +{"train_loss": 0.009737114422023296, "train_loss_bc": 0.009314477443695068, "train_loss_llm": 0.4226372539997101, "grad_norm": 0.07001017779111862, "global_step": 390, "epoch": 1, "lr": 0.009999957651321473} +{"train_loss": 0.010793833062052727, "train_loss_bc": 0.010402481071650982, "train_loss_llm": 0.3913517892360687, "grad_norm": 0.07023876160383224, "global_step": 391, "epoch": 1, "lr": 0.009999957651321473} +{"train_loss": 0.010602637194097042, "train_loss_bc": 0.010235416702926159, "train_loss_llm": 0.36722007393836975, "grad_norm": 0.09314236044883728, "global_step": 392, "epoch": 1, "lr": 0.00999995545175498} +{"train_loss": 0.013229576870799065, "train_loss_bc": 0.01280839741230011, "train_loss_llm": 0.4211796224117279, "grad_norm": 0.010268638841807842, "global_step": 393, "epoch": 1, "lr": 0.00999995545175498} +{"train_loss": 0.006598448846489191, "train_loss_bc": 0.006211167201399803, "train_loss_llm": 0.38728177547454834, "grad_norm": 0.012780013494193554, "global_step": 394, "epoch": 1, "lr": 0.00999995545175498} +{"train_loss": 0.010388839058578014, "train_loss_bc": 0.00987747497856617, "train_loss_llm": 0.5113644599914551, "grad_norm": 0.015021376311779022, "global_step": 395, "epoch": 1, "lr": 0.00999995545175498} +{"train_loss": 0.014541227370500565, "train_loss_bc": 0.014116690494120121, "train_loss_llm": 0.4245363771915436, "grad_norm": 0.02319067344069481, "global_step": 396, "epoch": 1, "lr": 0.00999995545175498} +{"train_loss": 0.007016970310360193, "train_loss_bc": 0.006666948553174734, "train_loss_llm": 0.3500216007232666, "grad_norm": 0.027652941644191742, "global_step": 397, "epoch": 1, "lr": 0.00999995545175498} +{"train_loss": 0.007159297354519367, "train_loss_bc": 0.006787103600800037, "train_loss_llm": 0.3721938133239746, "grad_norm": 0.03523283079266548, "global_step": 398, "epoch": 1, "lr": 0.00999995545175498} +{"train_loss": 0.009045234881341457, "train_loss_bc": 0.008669788017868996, "train_loss_llm": 0.37544700503349304, "grad_norm": 0.049455754458904266, "global_step": 399, "epoch": 1, "lr": 0.00999995545175498} +{"train_loss": 0.008435660041868687, "train_loss_bc": 0.008010749705135822, "train_loss_llm": 0.42491012811660767, "grad_norm": 0.0516187846660614, "global_step": 400, "epoch": 1, "lr": 0.009999953196503595} +{"train_loss": 0.010924630798399448, "train_loss_bc": 0.01047501340508461, "train_loss_llm": 0.44961774349212646, "grad_norm": 0.019651779904961586, "global_step": 401, "epoch": 1, "lr": 0.009999953196503595} +{"train_loss": 0.006148731801658869, "train_loss_bc": 0.0057486011646687984, "train_loss_llm": 0.4001305103302002, "grad_norm": 0.02275880053639412, "global_step": 402, "epoch": 1, "lr": 0.009999953196503595} +{"train_loss": 0.007822881452739239, "train_loss_bc": 0.00734285730868578, "train_loss_llm": 0.48002392053604126, "grad_norm": 0.03126152977347374, "global_step": 403, "epoch": 1, "lr": 0.009999953196503595} +{"train_loss": 0.00978546217083931, "train_loss_bc": 0.009389730170369148, "train_loss_llm": 0.3957315683364868, "grad_norm": 0.0482921376824379, "global_step": 404, "epoch": 1, "lr": 0.009999953196503595} +{"train_loss": 0.00851339939981699, "train_loss_bc": 0.008063830435276031, "train_loss_llm": 0.4495692849159241, "grad_norm": 0.05878711864352226, "global_step": 405, "epoch": 1, "lr": 0.009999953196503595} +{"train_loss": 0.012543894350528717, "train_loss_bc": 0.012006400153040886, "train_loss_llm": 0.5374938249588013, "grad_norm": 0.07312177866697311, "global_step": 406, "epoch": 1, "lr": 0.009999953196503595} +{"train_loss": 0.004626167938113213, "train_loss_bc": 0.004230715800076723, "train_loss_llm": 0.39545202255249023, "grad_norm": 0.0742395669221878, "global_step": 407, "epoch": 1, "lr": 0.009999953196503595} +{"train_loss": 0.010349040850996971, "train_loss_bc": 0.00993720255792141, "train_loss_llm": 0.41183826327323914, "grad_norm": 0.08678025007247925, "global_step": 408, "epoch": 1, "lr": 0.009999950885567342} +{"train_loss": 0.006372408010065556, "train_loss_bc": 0.006014788523316383, "train_loss_llm": 0.35761937499046326, "grad_norm": 0.010603218339383602, "global_step": 409, "epoch": 1, "lr": 0.009999950885567342} +{"train_loss": 0.009057055227458477, "train_loss_bc": 0.008602965623140335, "train_loss_llm": 0.45408937335014343, "grad_norm": 0.029408499598503113, "global_step": 410, "epoch": 1, "lr": 0.009999950885567342} +{"train_loss": 0.006392288021743298, "train_loss_bc": 0.006004677154123783, "train_loss_llm": 0.38761094212532043, "grad_norm": 0.040126774460077286, "global_step": 411, "epoch": 1, "lr": 0.009999950885567342} +{"train_loss": 0.007694873958826065, "train_loss_bc": 0.007268495857715607, "train_loss_llm": 0.42637819051742554, "grad_norm": 0.051206592470407486, "global_step": 412, "epoch": 1, "lr": 0.009999950885567342} +{"train_loss": 0.00797163788229227, "train_loss_bc": 0.007495692931115627, "train_loss_llm": 0.47594505548477173, "grad_norm": 0.06213797628879547, "global_step": 413, "epoch": 1, "lr": 0.009999950885567342} +{"train_loss": 0.008228043094277382, "train_loss_bc": 0.007861753925681114, "train_loss_llm": 0.366288959980011, "grad_norm": 0.07453621178865433, "global_step": 414, "epoch": 1, "lr": 0.009999950885567342} +{"train_loss": 0.007793589495122433, "train_loss_bc": 0.00744793564081192, "train_loss_llm": 0.34565383195877075, "grad_norm": 0.08818801492452621, "global_step": 415, "epoch": 1, "lr": 0.009999950885567342} +{"train_loss": 0.010696557350456715, "train_loss_bc": 0.01020999439060688, "train_loss_llm": 0.4865627586841583, "grad_norm": 0.10583324730396271, "global_step": 416, "epoch": 1, "lr": 0.009999948518946245} +{"train_loss": 0.016479648649692535, "train_loss_bc": 0.016043461859226227, "train_loss_llm": 0.43618765473365784, "grad_norm": 0.02455216646194458, "global_step": 417, "epoch": 1, "lr": 0.009999948518946245} diff --git a/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/train.log b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/train.log new file mode 100644 index 0000000000000000000000000000000000000000..74fda0185d5ac65159de7f7872a6e30ca3433217 --- /dev/null +++ b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/train.log @@ -0,0 +1,8 @@ +[2026-01-21 13:12:21,183][numexpr.utils][INFO] - Note: detected 112 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +[2026-01-21 13:12:21,183][numexpr.utils][INFO] - Note: NumExpr detected 112 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16. +[2026-01-21 13:12:21,183][numexpr.utils][INFO] - NumExpr defaulting to 16 threads. +[2026-01-21 13:12:27,199][datasets][INFO] - PyTorch version 2.2.2 available. +[2026-01-21 13:12:27,200][datasets][INFO] - TensorFlow version 2.15.1 available. +[2026-01-21 13:12:27,201][datasets][INFO] - JAX version 0.4.30 available. +[2026-01-21 13:12:35,484][absl][INFO] - MUJOCO_GL=osmesa, attempting to import specified OpenGL backend. +[2026-01-21 13:12:35,493][absl][INFO] - MuJoCo library version is: 2.3.7 diff --git a/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..046dcb6c0b0db686eeee21c8c44f6d67b74660a2 --- /dev/null +++ b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log @@ -0,0 +1,8 @@ +{"time":"2026-01-21T13:12:36.44966483+08:00","level":"INFO","msg":"using version","core version":"0.18.6"} +{"time":"2026-01-21T13:12:36.449675304+08:00","level":"INFO","msg":"created symlink","path":"/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-core.log"} +{"time":"2026-01-21T13:12:36.564980991+08:00","level":"INFO","msg":"created new stream","id":"yhjy9tz9"} +{"time":"2026-01-21T13:12:36.565006242+08:00","level":"INFO","msg":"stream: started","id":"yhjy9tz9"} +{"time":"2026-01-21T13:12:36.565029519+08:00","level":"INFO","msg":"sender: started","stream_id":"yhjy9tz9"} +{"time":"2026-01-21T13:12:36.565021074+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"yhjy9tz9"}} +{"time":"2026-01-21T13:12:36.565029409+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"yhjy9tz9"}} +{"time":"2026-01-21T13:12:37.456830647+08:00","level":"INFO","msg":"Starting system monitor"} diff --git a/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug.log b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..7cc40016b0d531704b8d3f8f3b8124e6afe091d9 --- /dev/null +++ b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug.log @@ -0,0 +1,26 @@ +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6 +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Configure stats pid to 3666395 +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from /home/u1131674/.config/wandb/settings +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from /work/u1131674/LLM-BC/wandb/settings +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None} +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/work/u1131674/LLM-BC/train.py', 'program': '/work/u1131674/LLM-BC/./train.py'} +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Applying login settings: {} +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:_log_setup():533] Logging user logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug.log +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:_log_setup():534] Logging internal logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-internal.log +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():619] calling init triggers +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():626] wandb.init called with sweep_config: {} +config: {'name': 'train_llmbc_lowdim', '_target_': 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace', 'obs_dim': 9, 'action_dim': 4, 'task_name': 'box-close-v2', 'exp_name': 'default', 'model_name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'llm_orig_expert_feedback': True, 'llm_do_sample': False, 'policy': {'_target_': 'llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy', 'model': {'_target_': 'llmbc.model.policy.policy_mlp.PolicyMLP', 'input_size': 9, 'hidden_size': [256, 256], 'output_size': 4, 'activation': 'relu', 'n_obs_steps': 1, 'n_action_steps': 1}, 'obs_dim': 9, 'action_dim': 4, 'llm_discriminator': {'_target_': 'llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator', 'task_id': 'box-close-v2', 'llm_translator': {'_target_': 'llmbc.translator.llm_translator.LLMTranslator', 'cfg': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/13.12.19_HuggingFaceTB/SmolLM2-135M-Instruct'}}}, 'obs_dim': 9, 'action_dim': 4, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1}}, 'loss_bc_weight': 1.0, 'loss_llm_weight': 0.001, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'normalize_llm_loss': True}, 'dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.AdamW', 'lr': 0.01, 'betas': [0.95, 0.999], 'eps': 1e-08, 'weight_decay': 1e-06}, 'training': {'device': 'cuda:0', 'seed': 42, 'debug': False, 'resume': False, 'lr_scheduler': 'cosine', 'lr_warmup_steps': 10, 'num_epochs': 1001, 'gradient_accumulate_every': 8, 'grad_norm_clip': 0.5, 'rollout_every': 5, 'checkpoint_every': 5, 'val_every': 1, 'sample_every': 5, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'box-close-v2-training', 'resume': True, 'mode': 'online', 'name': '2026.01.21-13.12.19_train_llmbc_lowdim_box-close-v2', 'tags': ['train_llmbc_lowdim', 'box-close-v2', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2', 'wandb_name_base': '2026.01.21-13.12.19_train_llmbc_lowdim_box-close-v2'}, 'task': {'name': 'box-close-v2', 'obs_dim': 9, 'action_dim': 4, 'env_runner': {'_target_': 'llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner', 'env_name': 'llf-metaworld-box-close-v2', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 30, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.9}, 'dataset': {'_target_': 'llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset', 'data_path': 'datasets/box-close-v2.pt', 'data_path2': 'datasets/box-close-v2.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': True}, 'instructor': {'_target_': 'llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor'}}, 'llm': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/13.12.19_HuggingFaceTB/SmolLM2-135M-Instruct'}}}} +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():669] starting backend +2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():673] sending inform_init request +2026-01-21 13:12:36,447 INFO MainThread:3666395 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2026-01-21 13:12:36,447 INFO MainThread:3666395 [wandb_init.py:init():686] backend started and connected +2026-01-21 13:12:36,456 INFO MainThread:3666395 [wandb_init.py:init():781] updated telemetry +2026-01-21 13:12:36,506 INFO MainThread:3666395 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout +2026-01-21 13:12:37,452 INFO MainThread:3666395 [wandb_init.py:init():867] starting run threads in backend +2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_console_start():2451] atexit reg +2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2299] redirect: wrap_raw +2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2364] Wrapping output streams. +2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2389] Redirects installed. +2026-01-21 13:12:38,019 INFO MainThread:3666395 [wandb_init.py:init():911] run started, returning control to user process +2026-01-21 13:12:38,019 INFO MainThread:3666395 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2'} diff --git a/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/output.log b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..c165b48414bb1159f704e97eb67cd26aa6afdec5 --- /dev/null +++ b/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/output.log @@ -0,0 +1,3 @@ +Eval MetaworldLowdimRunner 1/6: 0%| | 0/30 [00:00