Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- 2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/.hydra/config.yaml +53 -0
- 2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/.hydra/hydra.yaml +164 -0
- 2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/.hydra/overrides.yaml +6 -0
- 2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/eval.log +249 -0
- 2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/results.json +3 -0
- 2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/stderr.log +266 -0
- 2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/.hydra/config.yaml +51 -0
- 2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/.hydra/hydra.yaml +164 -0
- 2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/.hydra/overrides.yaml +6 -0
- 2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/eval.log +249 -0
- 2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/results.json +3 -0
- 2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/stderr.log +174 -0
.gitattributes
CHANGED
|
@@ -38,3 +38,5 @@ outputs/2025-11-18/exp1_len1024/dream-inst/heat/gsm8k/results.json filter=lfs di
|
|
| 38 |
outputs/2025-11-18/exp1_len1024/dream-inst/no_cache/gsm8k/results.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
outputs/2025-11-18/exp1_len1024/dream-inst/prefix/gsm8k/results.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
outputs/2025-11-18/exp2_random_gen/dream-inst/gsm8k_len256/results.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 38 |
outputs/2025-11-18/exp1_len1024/dream-inst/no_cache/gsm8k/results.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
outputs/2025-11-18/exp1_len1024/dream-inst/prefix/gsm8k/results.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
outputs/2025-11-18/exp2_random_gen/dream-inst/gsm8k_len256/results.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/results.json filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/results.json filter=lfs diff=lfs merge=lfs -text
|
2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/.hydra/config.yaml
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
generation:
|
| 3 |
+
mask_token_id: 151666
|
| 4 |
+
eot_token_id: 151643
|
| 5 |
+
pad_token_id: 151643
|
| 6 |
+
add_bos_token: true
|
| 7 |
+
alg: maskgit_plus
|
| 8 |
+
name: dream-inst
|
| 9 |
+
path: ${oc.env:DREAM_INST_PATH}
|
| 10 |
+
generation:
|
| 11 |
+
strategy: vanilla
|
| 12 |
+
threshold: null
|
| 13 |
+
factor: null
|
| 14 |
+
alg: maskgit_plus
|
| 15 |
+
gen_length: null
|
| 16 |
+
block_length: null
|
| 17 |
+
steps: null
|
| 18 |
+
temperature: 0.0
|
| 19 |
+
top_p: null
|
| 20 |
+
top_k: null
|
| 21 |
+
debias: false
|
| 22 |
+
output_probs: false
|
| 23 |
+
cache:
|
| 24 |
+
_target_: src.cache.dLLMCache
|
| 25 |
+
kr: 1
|
| 26 |
+
kp: 50
|
| 27 |
+
rou: 0.25
|
| 28 |
+
seed: 1234
|
| 29 |
+
batch_size: 1
|
| 30 |
+
attn_implementation: eager
|
| 31 |
+
dataset:
|
| 32 |
+
name: mmlu_pro
|
| 33 |
+
size: null
|
| 34 |
+
n_shot: null
|
| 35 |
+
system_prompt: null
|
| 36 |
+
batch_size: 1
|
| 37 |
+
mc_num: null
|
| 38 |
+
max_length: 4096
|
| 39 |
+
is_check_greedy: true
|
| 40 |
+
add_bos_token: true
|
| 41 |
+
nll_type: mc
|
| 42 |
+
log_type: ftb
|
| 43 |
+
eval_args:
|
| 44 |
+
log_samples: true
|
| 45 |
+
tasks: ${..dataset.name}
|
| 46 |
+
num_fewshot: ${..dataset.n_shot}
|
| 47 |
+
batch_size: ${..batch_size}
|
| 48 |
+
limit: ${..dataset.size}
|
| 49 |
+
confirm_run_unsafe_code: true
|
| 50 |
+
random_seed: ${..seed}
|
| 51 |
+
fewshot_random_seed: ${..seed}
|
| 52 |
+
numpy_random_seed: ${..seed}
|
| 53 |
+
torch_random_seed: ${..seed}
|
2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: outputs/2025-11-19/exp1_len256/dream-inst/dllm/mmlu_pro
|
| 4 |
+
sweep:
|
| 5 |
+
dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
params: null
|
| 13 |
+
help:
|
| 14 |
+
app_name: ${hydra.job.name}
|
| 15 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 16 |
+
|
| 17 |
+
'
|
| 18 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 19 |
+
|
| 20 |
+
Use --hydra-help to view Hydra specific help
|
| 21 |
+
|
| 22 |
+
'
|
| 23 |
+
template: '${hydra.help.header}
|
| 24 |
+
|
| 25 |
+
== Configuration groups ==
|
| 26 |
+
|
| 27 |
+
Compose your configuration from those groups (group=option)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
$APP_CONFIG_GROUPS
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
== Config ==
|
| 34 |
+
|
| 35 |
+
Override anything in the config (foo.bar=value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
$CONFIG
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
${hydra.help.footer}
|
| 42 |
+
|
| 43 |
+
'
|
| 44 |
+
hydra_help:
|
| 45 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 46 |
+
|
| 47 |
+
See https://hydra.cc for more info.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
== Flags ==
|
| 51 |
+
|
| 52 |
+
$FLAGS_HELP
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
== Configuration groups ==
|
| 56 |
+
|
| 57 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 58 |
+
to command line)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
$HYDRA_CONFIG_GROUPS
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 65 |
+
|
| 66 |
+
'
|
| 67 |
+
hydra_help: ???
|
| 68 |
+
hydra_logging:
|
| 69 |
+
version: 1
|
| 70 |
+
formatters:
|
| 71 |
+
simple:
|
| 72 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 73 |
+
handlers:
|
| 74 |
+
console:
|
| 75 |
+
class: logging.StreamHandler
|
| 76 |
+
formatter: simple
|
| 77 |
+
stream: ext://sys.stdout
|
| 78 |
+
root:
|
| 79 |
+
level: INFO
|
| 80 |
+
handlers:
|
| 81 |
+
- console
|
| 82 |
+
loggers:
|
| 83 |
+
logging_example:
|
| 84 |
+
level: DEBUG
|
| 85 |
+
disable_existing_loggers: false
|
| 86 |
+
job_logging:
|
| 87 |
+
version: 1
|
| 88 |
+
formatters:
|
| 89 |
+
simple:
|
| 90 |
+
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
| 91 |
+
handlers:
|
| 92 |
+
console:
|
| 93 |
+
class: logging.StreamHandler
|
| 94 |
+
formatter: simple
|
| 95 |
+
stream: ext://sys.stdout
|
| 96 |
+
file:
|
| 97 |
+
class: logging.FileHandler
|
| 98 |
+
formatter: simple
|
| 99 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
| 100 |
+
root:
|
| 101 |
+
level: INFO
|
| 102 |
+
handlers:
|
| 103 |
+
- console
|
| 104 |
+
- file
|
| 105 |
+
disable_existing_loggers: false
|
| 106 |
+
env: {}
|
| 107 |
+
mode: RUN
|
| 108 |
+
searchpath: []
|
| 109 |
+
callbacks: {}
|
| 110 |
+
output_subdir: .hydra
|
| 111 |
+
overrides:
|
| 112 |
+
hydra:
|
| 113 |
+
- hydra.run.dir=outputs/2025-11-19/exp1_len256/dream-inst/dllm/mmlu_pro
|
| 114 |
+
- hydra.mode=RUN
|
| 115 |
+
task:
|
| 116 |
+
- dataset.name=mmlu_pro
|
| 117 |
+
- model=dream-inst
|
| 118 |
+
- cache=dllm
|
| 119 |
+
- generation=vanilla
|
| 120 |
+
- batch_size=1
|
| 121 |
+
- seed=1234
|
| 122 |
+
job:
|
| 123 |
+
name: eval
|
| 124 |
+
chdir: null
|
| 125 |
+
override_dirname: batch_size=1,cache=dllm,dataset.name=mmlu_pro,generation=vanilla,model=dream-inst,seed=1234
|
| 126 |
+
id: ???
|
| 127 |
+
num: ???
|
| 128 |
+
config_name: eval
|
| 129 |
+
env_set: {}
|
| 130 |
+
env_copy: []
|
| 131 |
+
config:
|
| 132 |
+
override_dirname:
|
| 133 |
+
kv_sep: '='
|
| 134 |
+
item_sep: ','
|
| 135 |
+
exclude_keys: []
|
| 136 |
+
runtime:
|
| 137 |
+
version: 1.3.2
|
| 138 |
+
version_base: '1.3'
|
| 139 |
+
cwd: /xfr_ceph_sh/liuchonghan/HEAT/heat
|
| 140 |
+
config_sources:
|
| 141 |
+
- path: hydra.conf
|
| 142 |
+
schema: pkg
|
| 143 |
+
provider: hydra
|
| 144 |
+
- path: /xfr_ceph_sh/liuchonghan/HEAT/heat/configs
|
| 145 |
+
schema: file
|
| 146 |
+
provider: main
|
| 147 |
+
- path: ''
|
| 148 |
+
schema: structured
|
| 149 |
+
provider: schema
|
| 150 |
+
output_dir: /xfr_ceph_sh/liuchonghan/HEAT/heat/outputs/2025-11-19/exp1_len256/dream-inst/dllm/mmlu_pro
|
| 151 |
+
choices:
|
| 152 |
+
cache: dllm
|
| 153 |
+
generation: vanilla
|
| 154 |
+
model: dream-inst
|
| 155 |
+
hydra/env: default
|
| 156 |
+
hydra/callbacks: null
|
| 157 |
+
hydra/job_logging: default
|
| 158 |
+
hydra/hydra_logging: default
|
| 159 |
+
hydra/hydra_help: default
|
| 160 |
+
hydra/help: default
|
| 161 |
+
hydra/sweeper: basic
|
| 162 |
+
hydra/launcher: basic
|
| 163 |
+
hydra/output: default
|
| 164 |
+
verbose: false
|
2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- dataset.name=mmlu_pro
|
| 2 |
+
- model=dream-inst
|
| 3 |
+
- cache=dllm
|
| 4 |
+
- generation=vanilla
|
| 5 |
+
- batch_size=1
|
| 6 |
+
- seed=1234
|
2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/eval.log
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2025-11-19 17:39:42,871][accelerate.utils.other][WARNING] - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
| 2 |
+
[2025-11-19 17:39:51,105][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 3 |
+
[2025-11-19 17:39:51,105][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 4 |
+
[2025-11-19 17:39:51,156][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 5 |
+
[2025-11-19 17:39:51,156][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 6 |
+
[2025-11-19 17:39:51,181][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 7 |
+
[2025-11-19 17:39:51,181][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 8 |
+
[2025-11-19 17:39:51,303][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 9 |
+
[2025-11-19 17:39:51,303][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 10 |
+
[2025-11-19 17:39:51,307][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 11 |
+
[2025-11-19 17:39:51,308][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 12 |
+
[2025-11-19 17:39:51,342][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 13 |
+
[2025-11-19 17:39:51,342][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 14 |
+
[2025-11-19 17:39:51,374][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 15 |
+
[2025-11-19 17:39:51,374][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 16 |
+
[2025-11-19 17:39:51,561][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 17 |
+
[2025-11-19 17:39:51,562][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 18 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 19 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 20 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 21 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 22 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 23 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 24 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 25 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 26 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 27 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 28 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 29 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 30 |
+
[2025-11-19 17:40:34,823][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 31 |
+
[2025-11-19 17:40:34,824][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 32 |
+
[2025-11-19 17:40:34,825][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 5...
|
| 33 |
+
[2025-11-19 17:40:34,831][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 34 |
+
[2025-11-19 17:40:34,831][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 35 |
+
[2025-11-19 17:40:34,831][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 36 |
+
[2025-11-19 17:40:34,831][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 37 |
+
[2025-11-19 17:40:34,831][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 38 |
+
[2025-11-19 17:40:34,831][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 39 |
+
[2025-11-19 17:40:34,832][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 40 |
+
[2025-11-19 17:40:34,832][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 41 |
+
[2025-11-19 17:40:34,832][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 42 |
+
[2025-11-19 17:40:34,832][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 43 |
+
[2025-11-19 17:40:34,832][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 44 |
+
[2025-11-19 17:40:34,832][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 45 |
+
[2025-11-19 17:40:34,832][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 46 |
+
[2025-11-19 17:40:34,832][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 47 |
+
[2025-11-19 17:40:34,833][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 6...
|
| 48 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 49 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 50 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 51 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 52 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 53 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 54 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 55 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 56 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 57 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 58 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 59 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 60 |
+
[2025-11-19 17:40:34,849][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 61 |
+
[2025-11-19 17:40:34,850][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 62 |
+
[2025-11-19 17:40:34,851][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 2...
|
| 63 |
+
[2025-11-19 17:40:34,860][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 64 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 65 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 66 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 67 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 68 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 69 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 70 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 71 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 72 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 73 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 74 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 75 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 76 |
+
[2025-11-19 17:40:34,861][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 77 |
+
[2025-11-19 17:40:34,863][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 1...
|
| 78 |
+
[2025-11-19 17:40:34,911][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 79 |
+
[2025-11-19 17:40:34,911][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 80 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 81 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 82 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 83 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 84 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 85 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 86 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 87 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 88 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 89 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 90 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 91 |
+
[2025-11-19 17:40:34,912][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 92 |
+
[2025-11-19 17:40:34,914][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 0...
|
| 93 |
+
[2025-11-19 17:40:35,057][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 94 |
+
[2025-11-19 17:40:35,057][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 95 |
+
[2025-11-19 17:40:35,057][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 96 |
+
[2025-11-19 17:40:35,057][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 97 |
+
[2025-11-19 17:40:35,057][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 98 |
+
[2025-11-19 17:40:35,057][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 99 |
+
[2025-11-19 17:40:35,058][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 100 |
+
[2025-11-19 17:40:35,058][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 101 |
+
[2025-11-19 17:40:35,058][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 102 |
+
[2025-11-19 17:40:35,058][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 103 |
+
[2025-11-19 17:40:35,058][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 104 |
+
[2025-11-19 17:40:35,058][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 105 |
+
[2025-11-19 17:40:35,058][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 106 |
+
[2025-11-19 17:40:35,058][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 107 |
+
[2025-11-19 17:40:35,062][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 4...
|
| 108 |
+
[2025-11-19 17:40:35,085][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 109 |
+
[2025-11-19 17:40:35,085][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 110 |
+
[2025-11-19 17:40:35,085][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 111 |
+
[2025-11-19 17:40:35,085][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 112 |
+
[2025-11-19 17:40:35,086][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 113 |
+
[2025-11-19 17:40:35,086][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 114 |
+
[2025-11-19 17:40:35,086][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 115 |
+
[2025-11-19 17:40:35,086][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 116 |
+
[2025-11-19 17:40:35,086][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 117 |
+
[2025-11-19 17:40:35,086][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 118 |
+
[2025-11-19 17:40:35,086][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 119 |
+
[2025-11-19 17:40:35,086][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 120 |
+
[2025-11-19 17:40:35,086][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 121 |
+
[2025-11-19 17:40:35,086][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 122 |
+
[2025-11-19 17:40:35,089][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 7...
|
| 123 |
+
[2025-11-19 17:40:35,119][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 124 |
+
[2025-11-19 17:40:35,120][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 125 |
+
[2025-11-19 17:40:35,120][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 126 |
+
[2025-11-19 17:40:35,120][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 127 |
+
[2025-11-19 17:40:35,120][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 128 |
+
[2025-11-19 17:40:35,120][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 129 |
+
[2025-11-19 17:40:35,121][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 130 |
+
[2025-11-19 17:40:35,121][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 131 |
+
[2025-11-19 17:40:35,121][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 132 |
+
[2025-11-19 17:40:35,121][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 133 |
+
[2025-11-19 17:40:35,121][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 134 |
+
[2025-11-19 17:40:35,121][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 135 |
+
[2025-11-19 17:40:35,121][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 136 |
+
[2025-11-19 17:40:35,121][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 137 |
+
[2025-11-19 17:40:35,124][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 3...
|
| 138 |
+
[2025-11-19 17:40:39,984][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 1...
|
| 139 |
+
[2025-11-19 17:40:39,984][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 7...
|
| 140 |
+
[2025-11-19 17:40:39,984][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 5...
|
| 141 |
+
[2025-11-19 17:40:39,984][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 6...
|
| 142 |
+
[2025-11-19 17:40:39,984][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 3...
|
| 143 |
+
[2025-11-19 17:40:39,984][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 4...
|
| 144 |
+
[2025-11-19 17:40:39,985][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 0...
|
| 145 |
+
[2025-11-19 17:40:39,985][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 2...
|
| 146 |
+
[2025-11-19 17:40:40,026][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 6...
|
| 147 |
+
[2025-11-19 17:40:40,026][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 1...
|
| 148 |
+
[2025-11-19 17:40:40,026][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 7...
|
| 149 |
+
[2025-11-19 17:40:40,026][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 4...
|
| 150 |
+
[2025-11-19 17:40:40,026][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 0...
|
| 151 |
+
[2025-11-19 17:40:40,026][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 5...
|
| 152 |
+
[2025-11-19 17:40:40,026][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 2...
|
| 153 |
+
[2025-11-19 17:40:40,026][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 3...
|
| 154 |
+
[2025-11-19 17:40:40,081][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 1...
|
| 155 |
+
[2025-11-19 17:40:40,081][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 2...
|
| 156 |
+
[2025-11-19 17:40:40,081][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 0...
|
| 157 |
+
[2025-11-19 17:40:40,081][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 6...
|
| 158 |
+
[2025-11-19 17:40:40,081][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 5...
|
| 159 |
+
[2025-11-19 17:40:40,081][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 4...
|
| 160 |
+
[2025-11-19 17:40:40,081][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 3...
|
| 161 |
+
[2025-11-19 17:40:40,081][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 7...
|
| 162 |
+
[2025-11-19 17:40:40,132][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 1...
|
| 163 |
+
[2025-11-19 17:40:40,132][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 2...
|
| 164 |
+
[2025-11-19 17:40:40,132][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 0...
|
| 165 |
+
[2025-11-19 17:40:40,132][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 4...
|
| 166 |
+
[2025-11-19 17:40:40,132][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 6...
|
| 167 |
+
[2025-11-19 17:40:40,132][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 7...
|
| 168 |
+
[2025-11-19 17:40:40,132][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 5...
|
| 169 |
+
[2025-11-19 17:40:40,132][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 3...
|
| 170 |
+
[2025-11-19 17:40:40,180][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 6...
|
| 171 |
+
[2025-11-19 17:40:40,180][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 4...
|
| 172 |
+
[2025-11-19 17:40:40,180][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 1...
|
| 173 |
+
[2025-11-19 17:40:40,180][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 7...
|
| 174 |
+
[2025-11-19 17:40:40,180][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 5...
|
| 175 |
+
[2025-11-19 17:40:40,180][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 0...
|
| 176 |
+
[2025-11-19 17:40:40,180][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 2...
|
| 177 |
+
[2025-11-19 17:40:40,180][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 3...
|
| 178 |
+
[2025-11-19 17:40:40,247][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 6...
|
| 179 |
+
[2025-11-19 17:40:40,247][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 0...
|
| 180 |
+
[2025-11-19 17:40:40,247][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 1...
|
| 181 |
+
[2025-11-19 17:40:40,247][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 7...
|
| 182 |
+
[2025-11-19 17:40:40,247][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 4...
|
| 183 |
+
[2025-11-19 17:40:40,247][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 5...
|
| 184 |
+
[2025-11-19 17:40:40,247][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 2...
|
| 185 |
+
[2025-11-19 17:40:40,247][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 3...
|
| 186 |
+
[2025-11-19 17:40:40,281][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 6...
|
| 187 |
+
[2025-11-19 17:40:40,281][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 5...
|
| 188 |
+
[2025-11-19 17:40:40,281][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 7...
|
| 189 |
+
[2025-11-19 17:40:40,281][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 2...
|
| 190 |
+
[2025-11-19 17:40:40,281][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 1...
|
| 191 |
+
[2025-11-19 17:40:40,281][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 0...
|
| 192 |
+
[2025-11-19 17:40:40,282][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 4...
|
| 193 |
+
[2025-11-19 17:40:40,282][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 3...
|
| 194 |
+
[2025-11-19 17:40:40,324][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 2...
|
| 195 |
+
[2025-11-19 17:40:40,324][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 6...
|
| 196 |
+
[2025-11-19 17:40:40,324][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 1...
|
| 197 |
+
[2025-11-19 17:40:40,324][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 4...
|
| 198 |
+
[2025-11-19 17:40:40,324][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 5...
|
| 199 |
+
[2025-11-19 17:40:40,324][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 7...
|
| 200 |
+
[2025-11-19 17:40:40,324][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 0...
|
| 201 |
+
[2025-11-19 17:40:40,324][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 3...
|
| 202 |
+
[2025-11-19 17:40:40,344][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 1...
|
| 203 |
+
[2025-11-19 17:40:40,344][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 6...
|
| 204 |
+
[2025-11-19 17:40:40,344][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 4...
|
| 205 |
+
[2025-11-19 17:40:40,344][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 7...
|
| 206 |
+
[2025-11-19 17:40:40,344][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 5...
|
| 207 |
+
[2025-11-19 17:40:40,344][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 0...
|
| 208 |
+
[2025-11-19 17:40:40,344][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 3...
|
| 209 |
+
[2025-11-19 17:40:40,344][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 2...
|
| 210 |
+
[2025-11-19 17:40:40,365][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 7...
|
| 211 |
+
[2025-11-19 17:40:40,365][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 1...
|
| 212 |
+
[2025-11-19 17:40:40,365][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 6...
|
| 213 |
+
[2025-11-19 17:40:40,365][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 4...
|
| 214 |
+
[2025-11-19 17:40:40,365][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 3...
|
| 215 |
+
[2025-11-19 17:40:40,365][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 5...
|
| 216 |
+
[2025-11-19 17:40:40,365][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 0...
|
| 217 |
+
[2025-11-19 17:40:40,365][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 2...
|
| 218 |
+
[2025-11-19 17:40:40,385][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 7...
|
| 219 |
+
[2025-11-19 17:40:40,385][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 6...
|
| 220 |
+
[2025-11-19 17:40:40,385][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 5...
|
| 221 |
+
[2025-11-19 17:40:40,385][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 1...
|
| 222 |
+
[2025-11-19 17:40:40,385][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 4...
|
| 223 |
+
[2025-11-19 17:40:40,385][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 0...
|
| 224 |
+
[2025-11-19 17:40:40,385][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 3...
|
| 225 |
+
[2025-11-19 17:40:40,385][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 2...
|
| 226 |
+
[2025-11-19 17:40:40,406][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 6...
|
| 227 |
+
[2025-11-19 17:40:40,406][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 3...
|
| 228 |
+
[2025-11-19 17:40:40,406][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 0...
|
| 229 |
+
[2025-11-19 17:40:40,406][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 4...
|
| 230 |
+
[2025-11-19 17:40:40,406][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 5...
|
| 231 |
+
[2025-11-19 17:40:40,406][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 7...
|
| 232 |
+
[2025-11-19 17:40:40,406][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 1...
|
| 233 |
+
[2025-11-19 17:40:40,406][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 2...
|
| 234 |
+
[2025-11-19 17:40:40,426][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 6...
|
| 235 |
+
[2025-11-19 17:40:40,426][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 1...
|
| 236 |
+
[2025-11-19 17:40:40,426][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 7...
|
| 237 |
+
[2025-11-19 17:40:40,426][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 3...
|
| 238 |
+
[2025-11-19 17:40:40,426][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 4...
|
| 239 |
+
[2025-11-19 17:40:40,426][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 0...
|
| 240 |
+
[2025-11-19 17:40:40,426][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 5...
|
| 241 |
+
[2025-11-19 17:40:40,427][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 2...
|
| 242 |
+
[2025-11-19 17:40:40,447][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 243 |
+
[2025-11-19 17:40:40,447][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 244 |
+
[2025-11-19 17:40:40,447][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 245 |
+
[2025-11-19 17:40:40,447][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 246 |
+
[2025-11-19 17:40:40,447][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 247 |
+
[2025-11-19 17:40:40,447][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 248 |
+
[2025-11-19 17:40:40,447][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 249 |
+
[2025-11-19 17:40:40,447][lm_eval.evaluator][INFO] - Running generate_until requests
|
2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec3caae7416644291124ab09253fed704c145e61302e62b200add7168ef9da8e
|
| 3 |
+
size 10543271
|
2025-11-1/exp1_len256/dream-inst/dllm/mmlu_pro/stderr.log
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 1 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
| 2 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 3 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
| 4 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
| 5 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
| 6 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
| 7 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
| 8 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 9 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 10 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 11 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 12 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 13 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 14 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 17 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 18 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 19 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 20 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 21 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 22 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
| 24 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 25 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 26 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 27 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 28 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 29 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 30 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
| 32 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 33 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 34 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 35 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 36 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 37 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 38 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
| 40 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 41 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 42 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 43 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 44 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 45 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 46 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
| 48 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 49 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 50 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 51 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 52 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 53 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 54 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 57 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 58 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 59 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 60 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 61 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 62 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 63 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 65 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 66 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 67 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 68 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 69 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 70 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 71 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 73 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 74 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 75 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 76 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 77 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 78 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 79 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 81 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 82 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 83 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 84 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 85 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 86 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 87 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 89 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 90 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 91 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 92 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 93 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 94 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 95 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 97 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 98 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 99 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 100 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 101 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 102 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 103 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 105 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 106 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 107 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 108 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 109 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 110 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 111 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ipex flag is deprecated, will be removed in Accelerate v1.10. From 2.7.0, PyTorch has all needed optimizations for Intel CPU and XPU.
|
| 2 |
+
The following values were not passed to `accelerate launch` and had defaults used instead:
|
| 3 |
+
More than one GPU was found, enabling multi-GPU training.
|
| 4 |
+
If this was unintended please pass in `--num_processes=1`.
|
| 5 |
+
`--mixed_precision` was set to a value of `'no'`
|
| 6 |
+
`--dynamo_backend` was set to a value of `'no'`
|
| 7 |
+
To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.
|
| 8 |
+
[W1119 17:39:27.733661635 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 9 |
+
2025-11-19 17:39:36.278 | INFO | src.utils:pre_initialize:603 - {'strategy': 'vanilla', 'threshold': None, 'factor': None, 'alg': 'maskgit_plus', 'gen_length': 256, 'block_length': 32, 'steps': 256, 'temperature': 0.0, 'top_p': 0.9, 'top_k': None, 'debias': False, 'output_probs': False, 'mask_token_id': 151666, 'eot_token_id': 151643, 'pad_token_id': 151643, 'add_bos_token': True, 'sigma': None}
|
| 10 |
+
2025-11-19 17:39:36.278 | INFO | src.utils:pre_initialize:618 - Using cache with args: {'kp': 50, 'kr': 4}
|
| 11 |
+
[W1119 17:39:37.823449224 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 12 |
+
[W1119 17:39:39.543581206 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 13 |
+
[W1119 17:39:40.105350243 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 14 |
+
[W1119 17:39:40.238904703 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 15 |
+
[W1119 17:39:40.460865127 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 16 |
+
[W1119 17:39:40.523301171 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 17 |
+
[W1119 17:39:40.541752953 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 18 |
+
[W1119 17:39:40.626767406 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 28 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 29 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 30 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 31 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 32 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 33 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 34 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 35 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 36 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 37 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 38 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 39 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 40 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 41 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 42 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 43 |
+
2025-11-19 17:39:51.101 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 44 |
+
2025-11-19 17:39:51.152 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 45 |
+
2025-11-19 17:39:51.178 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 46 |
+
2025-11-19 17:39:51.300 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 47 |
+
2025-11-19 17:39:51.302 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 48 |
+
2025-11-19 17:39:51.338 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 49 |
+
2025-11-19 17:39:51.371 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 50 |
+
2025-11-19 17:39:51.559 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
|
| 140 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 141 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
|
| 145 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 146 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
|
| 150 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 151 |
+
|
| 152 |
+
|
| 153 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 154 |
+
|
| 155 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 156 |
+
|
| 157 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 158 |
+
|
| 159 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 160 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 161 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 162 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 163 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 164 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 165 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
|
| 170 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
|
| 175 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 176 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 177 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 178 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 179 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 180 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 181 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
|
| 189 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 190 |
+
|
| 191 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 192 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 193 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 194 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 195 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 196 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 197 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
|
| 205 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 206 |
+
|
| 207 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 208 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 209 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 210 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 211 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 212 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 213 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
|
| 221 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 222 |
+
|
| 223 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 224 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 225 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 226 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 227 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 228 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 229 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
|
| 237 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 238 |
+
|
| 239 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 240 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 241 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 242 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 243 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 244 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 245 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
|
| 250 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
|
| 255 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 256 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 257 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 258 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 259 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 260 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 261 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 262 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
|
| 271 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 272 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 273 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 274 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 275 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 276 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 277 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 278 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
|
| 287 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 288 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 289 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 290 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 291 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 292 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 293 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 294 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
|
| 303 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 304 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 305 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 306 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 307 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 308 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 309 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 310 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
|
| 319 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 320 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 321 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 322 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 323 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 324 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 325 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 326 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
|
| 335 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 336 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 337 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 338 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 339 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 340 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 341 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 342 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
|
| 351 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 352 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 353 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 354 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 355 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 356 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 357 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 358 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
2025-11-19 17:40:40.463 | WARNING | src.generation:generate:53 - The arguments ('add_bos_token', 'sigma', 'eot_token_id') are not supported by the generation strategy 'vanilla'.
|
| 368 |
+
2025-11-19 17:40:40.464 | WARNING | src.generation:generate:53 - The arguments ('eot_token_id', 'sigma', 'add_bos_token') are not supported by the generation strategy 'vanilla'.
|
| 369 |
+
2025-11-19 17:40:40.465 | WARNING | src.generation:generate:53 - The arguments ('eot_token_id', 'add_bos_token', 'sigma') are not supported by the generation strategy 'vanilla'.
|
| 370 |
+
2025-11-19 17:40:40.465 | WARNING | src.generation:generate:53 - The arguments ('eot_token_id', 'add_bos_token', 'sigma') are not supported by the generation strategy 'vanilla'.
|
| 371 |
+
2025-11-19 17:40:40.466 | WARNING | src.generation:generate:53 - The arguments ('add_bos_token', 'eot_token_id', 'sigma') are not supported by the generation strategy 'vanilla'.
|
| 372 |
+
2025-11-19 17:40:40.467 | WARNING | src.generation:generate:53 - The arguments ('eot_token_id', 'sigma', 'add_bos_token') are not supported by the generation strategy 'vanilla'.
|
| 373 |
+
2025-11-19 17:40:40.476 | WARNING | src.generation:generate:53 - The arguments ('add_bos_token', 'eot_token_id', 'sigma') are not supported by the generation strategy 'vanilla'.
|
| 374 |
+
|
| 375 |
+
2025-11-19 18:22:21.069 | INFO | __main__:main:87 - Throughput: 7.56 tokens/sec, Tokens per step: 0.89 tokens/step (full: 19.51 tokens/sec, 1.00 tokens/step), Latency: 13.16 s, Average Input Length: 1360.60 tokens, Peak GPU Memory: 17.24 GB, Total time: 2384.56 s
|
| 376 |
+
2025-11-19 18:22:21.153 | INFO | __main__:main:108 - Results saved to /xfr_ceph_sh/liuchonghan/HEAT/heat/outputs/2025-11-19/exp1_len256/dream-inst/dllm/mmlu_pro/results.json
|
| 377 |
+
2025-11-19 18:22:21.153 | INFO | __main__:main:111 - eval time: 2384.56 seconds
|
| 378 |
+
[rank0]:[W1119 18:22:21.341088084 ProcessGroupNCCL.cpp:1479] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
|
2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/.hydra/config.yaml
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
generation:
|
| 3 |
+
mask_token_id: 151666
|
| 4 |
+
eot_token_id: 151643
|
| 5 |
+
pad_token_id: 151643
|
| 6 |
+
add_bos_token: true
|
| 7 |
+
alg: maskgit_plus
|
| 8 |
+
name: dream-inst
|
| 9 |
+
path: ${oc.env:DREAM_INST_PATH}
|
| 10 |
+
generation:
|
| 11 |
+
strategy: vanilla
|
| 12 |
+
threshold: null
|
| 13 |
+
factor: null
|
| 14 |
+
alg: maskgit_plus
|
| 15 |
+
gen_length: null
|
| 16 |
+
block_length: null
|
| 17 |
+
steps: null
|
| 18 |
+
temperature: 0.0
|
| 19 |
+
top_p: null
|
| 20 |
+
top_k: null
|
| 21 |
+
debias: false
|
| 22 |
+
output_probs: false
|
| 23 |
+
cache:
|
| 24 |
+
_target_: src.cache.PrefixCache
|
| 25 |
+
use_dual: false
|
| 26 |
+
seed: 1234
|
| 27 |
+
batch_size: 1
|
| 28 |
+
attn_implementation: eager
|
| 29 |
+
dataset:
|
| 30 |
+
name: mmlu_pro
|
| 31 |
+
size: null
|
| 32 |
+
n_shot: null
|
| 33 |
+
system_prompt: null
|
| 34 |
+
batch_size: 1
|
| 35 |
+
mc_num: null
|
| 36 |
+
max_length: 4096
|
| 37 |
+
is_check_greedy: true
|
| 38 |
+
add_bos_token: true
|
| 39 |
+
nll_type: mc
|
| 40 |
+
log_type: ftb
|
| 41 |
+
eval_args:
|
| 42 |
+
log_samples: true
|
| 43 |
+
tasks: ${..dataset.name}
|
| 44 |
+
num_fewshot: ${..dataset.n_shot}
|
| 45 |
+
batch_size: ${..batch_size}
|
| 46 |
+
limit: ${..dataset.size}
|
| 47 |
+
confirm_run_unsafe_code: true
|
| 48 |
+
random_seed: ${..seed}
|
| 49 |
+
fewshot_random_seed: ${..seed}
|
| 50 |
+
numpy_random_seed: ${..seed}
|
| 51 |
+
torch_random_seed: ${..seed}
|
2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: outputs/2025-11-19/exp1_len256/dream-inst/prefix/mmlu_pro
|
| 4 |
+
sweep:
|
| 5 |
+
dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
params: null
|
| 13 |
+
help:
|
| 14 |
+
app_name: ${hydra.job.name}
|
| 15 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 16 |
+
|
| 17 |
+
'
|
| 18 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 19 |
+
|
| 20 |
+
Use --hydra-help to view Hydra specific help
|
| 21 |
+
|
| 22 |
+
'
|
| 23 |
+
template: '${hydra.help.header}
|
| 24 |
+
|
| 25 |
+
== Configuration groups ==
|
| 26 |
+
|
| 27 |
+
Compose your configuration from those groups (group=option)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
$APP_CONFIG_GROUPS
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
== Config ==
|
| 34 |
+
|
| 35 |
+
Override anything in the config (foo.bar=value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
$CONFIG
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
${hydra.help.footer}
|
| 42 |
+
|
| 43 |
+
'
|
| 44 |
+
hydra_help:
|
| 45 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 46 |
+
|
| 47 |
+
See https://hydra.cc for more info.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
== Flags ==
|
| 51 |
+
|
| 52 |
+
$FLAGS_HELP
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
== Configuration groups ==
|
| 56 |
+
|
| 57 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 58 |
+
to command line)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
$HYDRA_CONFIG_GROUPS
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 65 |
+
|
| 66 |
+
'
|
| 67 |
+
hydra_help: ???
|
| 68 |
+
hydra_logging:
|
| 69 |
+
version: 1
|
| 70 |
+
formatters:
|
| 71 |
+
simple:
|
| 72 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 73 |
+
handlers:
|
| 74 |
+
console:
|
| 75 |
+
class: logging.StreamHandler
|
| 76 |
+
formatter: simple
|
| 77 |
+
stream: ext://sys.stdout
|
| 78 |
+
root:
|
| 79 |
+
level: INFO
|
| 80 |
+
handlers:
|
| 81 |
+
- console
|
| 82 |
+
loggers:
|
| 83 |
+
logging_example:
|
| 84 |
+
level: DEBUG
|
| 85 |
+
disable_existing_loggers: false
|
| 86 |
+
job_logging:
|
| 87 |
+
version: 1
|
| 88 |
+
formatters:
|
| 89 |
+
simple:
|
| 90 |
+
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
| 91 |
+
handlers:
|
| 92 |
+
console:
|
| 93 |
+
class: logging.StreamHandler
|
| 94 |
+
formatter: simple
|
| 95 |
+
stream: ext://sys.stdout
|
| 96 |
+
file:
|
| 97 |
+
class: logging.FileHandler
|
| 98 |
+
formatter: simple
|
| 99 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
| 100 |
+
root:
|
| 101 |
+
level: INFO
|
| 102 |
+
handlers:
|
| 103 |
+
- console
|
| 104 |
+
- file
|
| 105 |
+
disable_existing_loggers: false
|
| 106 |
+
env: {}
|
| 107 |
+
mode: RUN
|
| 108 |
+
searchpath: []
|
| 109 |
+
callbacks: {}
|
| 110 |
+
output_subdir: .hydra
|
| 111 |
+
overrides:
|
| 112 |
+
hydra:
|
| 113 |
+
- hydra.run.dir=outputs/2025-11-19/exp1_len256/dream-inst/prefix/mmlu_pro
|
| 114 |
+
- hydra.mode=RUN
|
| 115 |
+
task:
|
| 116 |
+
- dataset.name=mmlu_pro
|
| 117 |
+
- model=dream-inst
|
| 118 |
+
- cache=prefix
|
| 119 |
+
- generation=vanilla
|
| 120 |
+
- batch_size=1
|
| 121 |
+
- seed=1234
|
| 122 |
+
job:
|
| 123 |
+
name: eval
|
| 124 |
+
chdir: null
|
| 125 |
+
override_dirname: batch_size=1,cache=prefix,dataset.name=mmlu_pro,generation=vanilla,model=dream-inst,seed=1234
|
| 126 |
+
id: ???
|
| 127 |
+
num: ???
|
| 128 |
+
config_name: eval
|
| 129 |
+
env_set: {}
|
| 130 |
+
env_copy: []
|
| 131 |
+
config:
|
| 132 |
+
override_dirname:
|
| 133 |
+
kv_sep: '='
|
| 134 |
+
item_sep: ','
|
| 135 |
+
exclude_keys: []
|
| 136 |
+
runtime:
|
| 137 |
+
version: 1.3.2
|
| 138 |
+
version_base: '1.3'
|
| 139 |
+
cwd: /xfr_ceph_sh/liuchonghan/HEAT/heat
|
| 140 |
+
config_sources:
|
| 141 |
+
- path: hydra.conf
|
| 142 |
+
schema: pkg
|
| 143 |
+
provider: hydra
|
| 144 |
+
- path: /xfr_ceph_sh/liuchonghan/HEAT/heat/configs
|
| 145 |
+
schema: file
|
| 146 |
+
provider: main
|
| 147 |
+
- path: ''
|
| 148 |
+
schema: structured
|
| 149 |
+
provider: schema
|
| 150 |
+
output_dir: /xfr_ceph_sh/liuchonghan/HEAT/heat/outputs/2025-11-19/exp1_len256/dream-inst/prefix/mmlu_pro
|
| 151 |
+
choices:
|
| 152 |
+
cache: prefix
|
| 153 |
+
generation: vanilla
|
| 154 |
+
model: dream-inst
|
| 155 |
+
hydra/env: default
|
| 156 |
+
hydra/callbacks: null
|
| 157 |
+
hydra/job_logging: default
|
| 158 |
+
hydra/hydra_logging: default
|
| 159 |
+
hydra/hydra_help: default
|
| 160 |
+
hydra/help: default
|
| 161 |
+
hydra/sweeper: basic
|
| 162 |
+
hydra/launcher: basic
|
| 163 |
+
hydra/output: default
|
| 164 |
+
verbose: false
|
2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- dataset.name=mmlu_pro
|
| 2 |
+
- model=dream-inst
|
| 3 |
+
- cache=prefix
|
| 4 |
+
- generation=vanilla
|
| 5 |
+
- batch_size=1
|
| 6 |
+
- seed=1234
|
2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/eval.log
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2025-11-19 18:22:51,650][accelerate.utils.other][WARNING] - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
| 2 |
+
[2025-11-19 18:22:59,357][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 3 |
+
[2025-11-19 18:22:59,358][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 4 |
+
[2025-11-19 18:23:00,307][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 5 |
+
[2025-11-19 18:23:00,307][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 6 |
+
[2025-11-19 18:23:00,571][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 7 |
+
[2025-11-19 18:23:00,571][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 8 |
+
[2025-11-19 18:23:00,608][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 9 |
+
[2025-11-19 18:23:00,608][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 10 |
+
[2025-11-19 18:23:00,753][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 11 |
+
[2025-11-19 18:23:00,753][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 12 |
+
[2025-11-19 18:23:01,333][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 13 |
+
[2025-11-19 18:23:01,333][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 14 |
+
[2025-11-19 18:23:02,643][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 15 |
+
[2025-11-19 18:23:02,643][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 16 |
+
[2025-11-19 18:23:03,085][lm_eval.evaluator][INFO] - Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
|
| 17 |
+
[2025-11-19 18:23:03,086][lm_eval.evaluator][INFO] - Using pre-initialized model
|
| 18 |
+
[2025-11-19 18:23:40,845][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 19 |
+
[2025-11-19 18:23:40,845][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 20 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 21 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 22 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 23 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 24 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 25 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 26 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 27 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 28 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 29 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 30 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 31 |
+
[2025-11-19 18:23:40,846][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 32 |
+
[2025-11-19 18:23:40,849][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 1...
|
| 33 |
+
[2025-11-19 18:23:41,778][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 34 |
+
[2025-11-19 18:23:41,779][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 35 |
+
[2025-11-19 18:23:41,779][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 36 |
+
[2025-11-19 18:23:41,779][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 37 |
+
[2025-11-19 18:23:41,780][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 38 |
+
[2025-11-19 18:23:41,780][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 39 |
+
[2025-11-19 18:23:41,780][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 40 |
+
[2025-11-19 18:23:41,780][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 41 |
+
[2025-11-19 18:23:41,780][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 42 |
+
[2025-11-19 18:23:41,780][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 43 |
+
[2025-11-19 18:23:41,780][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 44 |
+
[2025-11-19 18:23:41,781][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 45 |
+
[2025-11-19 18:23:41,781][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 46 |
+
[2025-11-19 18:23:41,781][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 47 |
+
[2025-11-19 18:23:41,787][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 6...
|
| 48 |
+
[2025-11-19 18:23:42,857][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 49 |
+
[2025-11-19 18:23:42,857][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 50 |
+
[2025-11-19 18:23:42,857][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 51 |
+
[2025-11-19 18:23:42,858][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 52 |
+
[2025-11-19 18:23:42,858][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 53 |
+
[2025-11-19 18:23:42,858][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 54 |
+
[2025-11-19 18:23:42,858][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 55 |
+
[2025-11-19 18:23:42,858][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 56 |
+
[2025-11-19 18:23:42,858][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 57 |
+
[2025-11-19 18:23:42,859][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 58 |
+
[2025-11-19 18:23:42,859][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 59 |
+
[2025-11-19 18:23:42,859][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 60 |
+
[2025-11-19 18:23:42,859][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 61 |
+
[2025-11-19 18:23:42,859][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 62 |
+
[2025-11-19 18:23:42,865][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 7...
|
| 63 |
+
[2025-11-19 18:23:43,755][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 64 |
+
[2025-11-19 18:23:43,755][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 65 |
+
[2025-11-19 18:23:43,755][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 66 |
+
[2025-11-19 18:23:43,755][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 67 |
+
[2025-11-19 18:23:43,756][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 68 |
+
[2025-11-19 18:23:43,756][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 69 |
+
[2025-11-19 18:23:43,756][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 70 |
+
[2025-11-19 18:23:43,756][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 71 |
+
[2025-11-19 18:23:43,756][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 72 |
+
[2025-11-19 18:23:43,756][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 73 |
+
[2025-11-19 18:23:43,756][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 74 |
+
[2025-11-19 18:23:43,756][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 75 |
+
[2025-11-19 18:23:43,756][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 76 |
+
[2025-11-19 18:23:43,756][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 77 |
+
[2025-11-19 18:23:43,759][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 3...
|
| 78 |
+
[2025-11-19 18:23:45,734][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 79 |
+
[2025-11-19 18:23:45,735][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 80 |
+
[2025-11-19 18:23:45,735][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 81 |
+
[2025-11-19 18:23:45,735][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 82 |
+
[2025-11-19 18:23:45,735][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 83 |
+
[2025-11-19 18:23:45,735][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 84 |
+
[2025-11-19 18:23:45,735][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 85 |
+
[2025-11-19 18:23:45,735][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 86 |
+
[2025-11-19 18:23:45,735][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 87 |
+
[2025-11-19 18:23:45,735][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 88 |
+
[2025-11-19 18:23:45,736][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 89 |
+
[2025-11-19 18:23:45,736][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 90 |
+
[2025-11-19 18:23:45,736][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 91 |
+
[2025-11-19 18:23:45,736][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 92 |
+
[2025-11-19 18:23:45,738][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 0...
|
| 93 |
+
[2025-11-19 18:23:54,454][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 94 |
+
[2025-11-19 18:23:54,454][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 95 |
+
[2025-11-19 18:23:54,454][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 96 |
+
[2025-11-19 18:23:54,454][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 97 |
+
[2025-11-19 18:23:54,454][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 98 |
+
[2025-11-19 18:23:54,454][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 99 |
+
[2025-11-19 18:23:54,454][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 100 |
+
[2025-11-19 18:23:54,455][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 101 |
+
[2025-11-19 18:23:54,455][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 102 |
+
[2025-11-19 18:23:54,455][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 103 |
+
[2025-11-19 18:23:54,455][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 104 |
+
[2025-11-19 18:23:54,455][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 105 |
+
[2025-11-19 18:23:54,455][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 106 |
+
[2025-11-19 18:23:54,455][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 107 |
+
[2025-11-19 18:23:54,458][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 4...
|
| 108 |
+
[2025-11-19 18:23:57,947][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 109 |
+
[2025-11-19 18:23:57,947][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 110 |
+
[2025-11-19 18:23:57,947][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 111 |
+
[2025-11-19 18:23:57,947][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 112 |
+
[2025-11-19 18:23:57,947][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 113 |
+
[2025-11-19 18:23:57,947][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 114 |
+
[2025-11-19 18:23:57,948][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 115 |
+
[2025-11-19 18:23:57,948][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 116 |
+
[2025-11-19 18:23:57,948][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 117 |
+
[2025-11-19 18:23:57,948][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 118 |
+
[2025-11-19 18:23:57,948][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 119 |
+
[2025-11-19 18:23:57,948][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 120 |
+
[2025-11-19 18:23:57,948][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 121 |
+
[2025-11-19 18:23:57,948][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 122 |
+
[2025-11-19 18:23:57,951][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 2...
|
| 123 |
+
[2025-11-19 18:23:58,344][lm_eval.evaluator][INFO] - mmlu_pro_biology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 124 |
+
[2025-11-19 18:23:58,344][lm_eval.evaluator][INFO] - mmlu_pro_business: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 125 |
+
[2025-11-19 18:23:58,344][lm_eval.evaluator][INFO] - mmlu_pro_chemistry: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 126 |
+
[2025-11-19 18:23:58,344][lm_eval.evaluator][INFO] - mmlu_pro_computer_science: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 127 |
+
[2025-11-19 18:23:58,344][lm_eval.evaluator][INFO] - mmlu_pro_economics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 128 |
+
[2025-11-19 18:23:58,344][lm_eval.evaluator][INFO] - mmlu_pro_engineering: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 129 |
+
[2025-11-19 18:23:58,344][lm_eval.evaluator][INFO] - mmlu_pro_health: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 130 |
+
[2025-11-19 18:23:58,345][lm_eval.evaluator][INFO] - mmlu_pro_history: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 131 |
+
[2025-11-19 18:23:58,345][lm_eval.evaluator][INFO] - mmlu_pro_law: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 132 |
+
[2025-11-19 18:23:58,345][lm_eval.evaluator][INFO] - mmlu_pro_math: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 133 |
+
[2025-11-19 18:23:58,345][lm_eval.evaluator][INFO] - mmlu_pro_other: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 134 |
+
[2025-11-19 18:23:58,345][lm_eval.evaluator][INFO] - mmlu_pro_philosophy: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 135 |
+
[2025-11-19 18:23:58,345][lm_eval.evaluator][INFO] - mmlu_pro_physics: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 136 |
+
[2025-11-19 18:23:58,345][lm_eval.evaluator][INFO] - mmlu_pro_psychology: Using gen_kwargs: {'until': ['Question:'], 'max_gen_toks': 2048, 'do_sample': False, 'temperature': 0.0}
|
| 137 |
+
[2025-11-19 18:23:58,347][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_biology on rank 5...
|
| 138 |
+
[2025-11-19 18:24:01,625][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 0...
|
| 139 |
+
[2025-11-19 18:24:01,625][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 4...
|
| 140 |
+
[2025-11-19 18:24:01,626][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 5...
|
| 141 |
+
[2025-11-19 18:24:01,626][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 7...
|
| 142 |
+
[2025-11-19 18:24:01,626][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 1...
|
| 143 |
+
[2025-11-19 18:24:01,626][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 2...
|
| 144 |
+
[2025-11-19 18:24:01,626][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 3...
|
| 145 |
+
[2025-11-19 18:24:01,626][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_business on rank 6...
|
| 146 |
+
[2025-11-19 18:24:01,669][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 1...
|
| 147 |
+
[2025-11-19 18:24:01,669][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 6...
|
| 148 |
+
[2025-11-19 18:24:01,669][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 0...
|
| 149 |
+
[2025-11-19 18:24:01,669][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 4...
|
| 150 |
+
[2025-11-19 18:24:01,669][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 5...
|
| 151 |
+
[2025-11-19 18:24:01,669][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 2...
|
| 152 |
+
[2025-11-19 18:24:01,669][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 3...
|
| 153 |
+
[2025-11-19 18:24:01,669][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_chemistry on rank 7...
|
| 154 |
+
[2025-11-19 18:24:01,690][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 0...
|
| 155 |
+
[2025-11-19 18:24:01,690][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 1...
|
| 156 |
+
[2025-11-19 18:24:01,690][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 2...
|
| 157 |
+
[2025-11-19 18:24:01,690][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 5...
|
| 158 |
+
[2025-11-19 18:24:01,690][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 6...
|
| 159 |
+
[2025-11-19 18:24:01,690][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 4...
|
| 160 |
+
[2025-11-19 18:24:01,690][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 7...
|
| 161 |
+
[2025-11-19 18:24:01,690][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_computer_science on rank 3...
|
| 162 |
+
[2025-11-19 18:24:01,710][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 0...
|
| 163 |
+
[2025-11-19 18:24:01,710][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 1...
|
| 164 |
+
[2025-11-19 18:24:01,710][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 4...
|
| 165 |
+
[2025-11-19 18:24:01,710][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 5...
|
| 166 |
+
[2025-11-19 18:24:01,710][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 6...
|
| 167 |
+
[2025-11-19 18:24:01,710][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 7...
|
| 168 |
+
[2025-11-19 18:24:01,710][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 2...
|
| 169 |
+
[2025-11-19 18:24:01,710][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_economics on rank 3...
|
| 170 |
+
[2025-11-19 18:24:01,730][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 0...
|
| 171 |
+
[2025-11-19 18:24:01,730][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 1...
|
| 172 |
+
[2025-11-19 18:24:01,730][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 7...
|
| 173 |
+
[2025-11-19 18:24:01,730][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 5...
|
| 174 |
+
[2025-11-19 18:24:01,730][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 4...
|
| 175 |
+
[2025-11-19 18:24:01,730][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 6...
|
| 176 |
+
[2025-11-19 18:24:01,730][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 3...
|
| 177 |
+
[2025-11-19 18:24:01,731][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_engineering on rank 2...
|
| 178 |
+
[2025-11-19 18:24:01,751][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 0...
|
| 179 |
+
[2025-11-19 18:24:01,751][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 1...
|
| 180 |
+
[2025-11-19 18:24:01,751][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 7...
|
| 181 |
+
[2025-11-19 18:24:01,751][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 2...
|
| 182 |
+
[2025-11-19 18:24:01,751][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 4...
|
| 183 |
+
[2025-11-19 18:24:01,751][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 6...
|
| 184 |
+
[2025-11-19 18:24:01,751][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 5...
|
| 185 |
+
[2025-11-19 18:24:01,751][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_health on rank 3...
|
| 186 |
+
[2025-11-19 18:24:01,771][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 1...
|
| 187 |
+
[2025-11-19 18:24:01,771][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 7...
|
| 188 |
+
[2025-11-19 18:24:01,771][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 0...
|
| 189 |
+
[2025-11-19 18:24:01,771][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 4...
|
| 190 |
+
[2025-11-19 18:24:01,771][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 6...
|
| 191 |
+
[2025-11-19 18:24:01,771][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 2...
|
| 192 |
+
[2025-11-19 18:24:01,771][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 5...
|
| 193 |
+
[2025-11-19 18:24:01,771][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_history on rank 3...
|
| 194 |
+
[2025-11-19 18:24:01,791][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 1...
|
| 195 |
+
[2025-11-19 18:24:01,791][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 6...
|
| 196 |
+
[2025-11-19 18:24:01,791][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 4...
|
| 197 |
+
[2025-11-19 18:24:01,791][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 5...
|
| 198 |
+
[2025-11-19 18:24:01,791][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 7...
|
| 199 |
+
[2025-11-19 18:24:01,791][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 0...
|
| 200 |
+
[2025-11-19 18:24:01,791][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 3...
|
| 201 |
+
[2025-11-19 18:24:01,791][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_law on rank 2...
|
| 202 |
+
[2025-11-19 18:24:01,811][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 1...
|
| 203 |
+
[2025-11-19 18:24:01,811][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 3...
|
| 204 |
+
[2025-11-19 18:24:01,811][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 0...
|
| 205 |
+
[2025-11-19 18:24:01,811][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 4...
|
| 206 |
+
[2025-11-19 18:24:01,811][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 5...
|
| 207 |
+
[2025-11-19 18:24:01,811][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 6...
|
| 208 |
+
[2025-11-19 18:24:01,811][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 7...
|
| 209 |
+
[2025-11-19 18:24:01,811][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_math on rank 2...
|
| 210 |
+
[2025-11-19 18:24:01,832][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 1...
|
| 211 |
+
[2025-11-19 18:24:01,832][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 0...
|
| 212 |
+
[2025-11-19 18:24:01,832][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 6...
|
| 213 |
+
[2025-11-19 18:24:01,832][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 5...
|
| 214 |
+
[2025-11-19 18:24:01,832][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 7...
|
| 215 |
+
[2025-11-19 18:24:01,832][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 3...
|
| 216 |
+
[2025-11-19 18:24:01,832][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 4...
|
| 217 |
+
[2025-11-19 18:24:01,832][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_other on rank 2...
|
| 218 |
+
[2025-11-19 18:24:01,852][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 1...
|
| 219 |
+
[2025-11-19 18:24:01,852][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 6...
|
| 220 |
+
[2025-11-19 18:24:01,852][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 3...
|
| 221 |
+
[2025-11-19 18:24:01,852][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 0...
|
| 222 |
+
[2025-11-19 18:24:01,852][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 7...
|
| 223 |
+
[2025-11-19 18:24:01,852][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 5...
|
| 224 |
+
[2025-11-19 18:24:01,852][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 4...
|
| 225 |
+
[2025-11-19 18:24:01,852][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_philosophy on rank 2...
|
| 226 |
+
[2025-11-19 18:24:01,873][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 2...
|
| 227 |
+
[2025-11-19 18:24:01,873][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 6...
|
| 228 |
+
[2025-11-19 18:24:01,873][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 7...
|
| 229 |
+
[2025-11-19 18:24:01,873][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 1...
|
| 230 |
+
[2025-11-19 18:24:01,873][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 5...
|
| 231 |
+
[2025-11-19 18:24:01,873][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 3...
|
| 232 |
+
[2025-11-19 18:24:01,873][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 4...
|
| 233 |
+
[2025-11-19 18:24:01,873][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_physics on rank 0...
|
| 234 |
+
[2025-11-19 18:24:01,893][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 2...
|
| 235 |
+
[2025-11-19 18:24:01,893][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 1...
|
| 236 |
+
[2025-11-19 18:24:01,893][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 7...
|
| 237 |
+
[2025-11-19 18:24:01,893][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 4...
|
| 238 |
+
[2025-11-19 18:24:01,893][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 6...
|
| 239 |
+
[2025-11-19 18:24:01,893][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 0...
|
| 240 |
+
[2025-11-19 18:24:01,893][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 5...
|
| 241 |
+
[2025-11-19 18:24:01,893][lm_eval.api.task][INFO] - Building contexts for mmlu_pro_psychology on rank 3...
|
| 242 |
+
[2025-11-19 18:24:01,913][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 243 |
+
[2025-11-19 18:24:01,913][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 244 |
+
[2025-11-19 18:24:01,913][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 245 |
+
[2025-11-19 18:24:01,913][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 246 |
+
[2025-11-19 18:24:01,913][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 247 |
+
[2025-11-19 18:24:01,913][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 248 |
+
[2025-11-19 18:24:01,913][lm_eval.evaluator][INFO] - Running generate_until requests
|
| 249 |
+
[2025-11-19 18:24:01,913][lm_eval.evaluator][INFO] - Running generate_until requests
|
2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1230760c961dd9022be0aec4f3f825675b214710c9f5cd95b876255b60bfc0cd
|
| 3 |
+
size 10524080
|
2025-11-1/exp1_len256/dream-inst/prefix/mmlu_pro/stderr.log
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
| 1 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
| 2 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
| 3 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
| 4 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
| 5 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
| 6 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
| 7 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
| 8 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 9 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 10 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 11 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 12 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 13 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 14 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
| 15 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 17 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 18 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 19 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 20 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 21 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 22 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 23 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 25 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 26 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 27 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 28 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 29 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 30 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 31 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 33 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 34 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 35 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 36 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 37 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 38 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 39 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 41 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 42 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 43 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 44 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 45 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 46 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 47 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 49 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 50 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 51 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 52 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 53 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 54 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 55 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 57 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 58 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 59 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 60 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 61 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 62 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 63 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 65 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 66 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 67 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 68 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 69 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 70 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 71 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 73 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 74 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 75 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 76 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 77 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 78 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 79 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 81 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 82 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 83 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 84 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 85 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 86 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 87 |
0%| | 0/13 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 89 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 90 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 91 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 92 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 93 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 94 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 95 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 97 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 98 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 99 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 100 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 101 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 102 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 103 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 105 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 106 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 107 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 108 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 109 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 110 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 111 |
0%| | 0/12 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ipex flag is deprecated, will be removed in Accelerate v1.10. From 2.7.0, PyTorch has all needed optimizations for Intel CPU and XPU.
|
| 2 |
+
The following values were not passed to `accelerate launch` and had defaults used instead:
|
| 3 |
+
More than one GPU was found, enabling multi-GPU training.
|
| 4 |
+
If this was unintended please pass in `--num_processes=1`.
|
| 5 |
+
`--mixed_precision` was set to a value of `'no'`
|
| 6 |
+
`--dynamo_backend` was set to a value of `'no'`
|
| 7 |
+
To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.
|
| 8 |
+
[W1119 18:22:37.226195129 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 9 |
+
2025-11-19 18:22:45.937 | INFO | src.utils:pre_initialize:603 - {'strategy': 'vanilla', 'threshold': None, 'factor': None, 'alg': 'maskgit_plus', 'gen_length': 256, 'block_length': 32, 'steps': 256, 'temperature': 0.0, 'top_p': 0.9, 'top_k': None, 'debias': False, 'output_probs': False, 'mask_token_id': 151666, 'eot_token_id': 151643, 'pad_token_id': 151643, 'add_bos_token': True, 'sigma': None}
|
| 10 |
+
[W1119 18:22:46.786781829 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 11 |
+
[W1119 18:22:49.307737806 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 12 |
+
[W1119 18:22:49.337649472 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 13 |
+
[W1119 18:22:49.770033713 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 14 |
+
[W1119 18:22:50.165484231 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 15 |
+
[W1119 18:22:50.184086372 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 16 |
+
[W1119 18:22:50.184270291 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 17 |
+
[W1119 18:22:50.186893603 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [localhost]:29500 (errno: 97 - Address family not supported by protocol).
|
| 18 |
+
|
| 19 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 20 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 26 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 27 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 28 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 29 |
+
|
| 30 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 31 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 32 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 33 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 34 |
+
|
| 35 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 39 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 40 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 41 |
+
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
|
| 42 |
+
2025-11-19 18:22:59.355 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 43 |
+
2025-11-19 18:23:00.303 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 44 |
+
2025-11-19 18:23:00.569 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 45 |
+
2025-11-19 18:23:00.605 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 46 |
+
2025-11-19 18:23:00.751 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 47 |
+
2025-11-19 18:23:01.331 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 48 |
+
2025-11-19 18:23:02.640 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 49 |
+
2025-11-19 18:23:03.083 | INFO | __main__:overwrite_eval_task:62 - MMLU-Pro dataset is too large, shrink to 100 for faster evaluation.
|
| 50 |
+
|
| 51 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 52 |
+
|
| 53 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 54 |
+
|
| 55 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 56 |
+
|
| 57 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 58 |
+
|
| 59 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 60 |
+
|
| 61 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 62 |
+
|
| 63 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 64 |
+
|
| 65 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 66 |
+
|
| 67 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 68 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 69 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 70 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 71 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 72 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 73 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 74 |
+
|
| 75 |
+
|
| 76 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
|
| 83 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 84 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 85 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 86 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 87 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 88 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 89 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 90 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
|
| 99 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 100 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 101 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 102 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 103 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 104 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 105 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 106 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
|
| 115 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 116 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 117 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 118 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 119 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 120 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 121 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 122 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
|
| 131 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 132 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 133 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 134 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 135 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 136 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 137 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 138 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
|
| 147 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 148 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 149 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 150 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 151 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 152 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 153 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 154 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
|
| 163 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 164 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 165 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 166 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 167 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 168 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 169 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 170 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
|
| 179 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 180 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 181 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 182 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 183 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 184 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 185 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 186 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
|
| 195 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 196 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 197 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 198 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 199 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 200 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 201 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 202 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
|
| 211 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 212 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 213 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 214 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 215 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 216 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 217 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 218 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
|
| 227 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 228 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 229 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 230 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 231 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 232 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 233 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 234 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
|
| 243 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 244 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 245 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 246 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 247 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 248 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 249 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 250 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
|
| 259 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 260 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 261 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 262 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 263 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 264 |
0%| | 0/13 [00:00<?, ?it/s]
|
| 265 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 266 |
0%| | 0/12 [00:00<?, ?it/s]
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
2025-11-19 18:24:01.929 | WARNING | src.generation:generate:53 - The arguments ('add_bos_token', 'sigma', 'eot_token_id') are not supported by the generation strategy 'vanilla'.
|
| 276 |
+
2025-11-19 18:24:01.929 | WARNING | src.generation:generate:53 - The arguments ('add_bos_token', 'eot_token_id', 'sigma') are not supported by the generation strategy 'vanilla'.
|
| 277 |
+
2025-11-19 18:24:01.929 | WARNING | src.generation:generate:53 - The arguments ('eot_token_id', 'sigma', 'add_bos_token') are not supported by the generation strategy 'vanilla'.
|
| 278 |
+
2025-11-19 18:24:01.930 | WARNING | src.generation:generate:53 - The arguments ('sigma', 'eot_token_id', 'add_bos_token') are not supported by the generation strategy 'vanilla'.
|
| 279 |
+
2025-11-19 18:24:01.930 | WARNING | src.generation:generate:53 - The arguments ('add_bos_token', 'sigma', 'eot_token_id') are not supported by the generation strategy 'vanilla'.
|
| 280 |
+
2025-11-19 18:24:01.944 | WARNING | src.generation:generate:53 - The arguments ('add_bos_token', 'eot_token_id', 'sigma') are not supported by the generation strategy 'vanilla'.
|
| 281 |
+
2025-11-19 18:24:01.944 | WARNING | src.generation:generate:53 - The arguments ('add_bos_token', 'sigma', 'eot_token_id') are not supported by the generation strategy 'vanilla'.
|
| 282 |
+
|
| 283 |
+
2025-11-19 19:00:06.282 | INFO | __main__:main:87 - Throughput: 8.65 tokens/sec, Tokens per step: 0.91 tokens/step (full: 22.65 tokens/sec, 1.00 tokens/step), Latency: 11.33 s, Average Input Length: 1360.60 tokens, Peak GPU Memory: 18.50 GB, Total time: 2126.30 s
|
| 284 |
+
2025-11-19 19:00:06.383 | INFO | __main__:main:108 - Results saved to /xfr_ceph_sh/liuchonghan/HEAT/heat/outputs/2025-11-19/exp1_len256/dream-inst/prefix/mmlu_pro/results.json
|
| 285 |
+
2025-11-19 19:00:06.383 | INFO | __main__:main:111 - eval time: 2126.30 seconds
|
| 286 |
+
[rank0]:[W1119 19:00:06.522929613 ProcessGroupNCCL.cpp:1479] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
|