Linksome commited on
Commit
48084a0
·
verified ·
1 Parent(s): 1598960

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. .ipynb_checkpoints/A-checkpoint.yaml +63 -0
  2. .ipynb_checkpoints/D-checkpoint.yaml +63 -0
  3. .ipynb_checkpoints/G-checkpoint.yaml +63 -0
  4. .ipynb_checkpoints/H-checkpoint.yaml +63 -0
  5. .ipynb_checkpoints/I-checkpoint.yaml +63 -0
  6. B/logs/B/10k_port8006_gpu0_20251223_141414_batch2.log +0 -0
  7. B/logs/B/1k_port8002_gpu0_20251223_083422_batch1.log +0 -0
  8. B/logs/B/1k_port8002_gpu0_20251223_083422_batch1.log.pid +1 -0
  9. B/logs/B/1k_port8002_gpu0_20251223_141414_batch1.log +0 -0
  10. B/logs/B/1k_port8002_gpu0_20251223_141414_batch1.log.pid +1 -0
  11. B/logs/B/1k_port8002_gpu0_20251224_034005_batch1.log.pid +1 -0
  12. B/logs/B/1k_port8002_gpu0_20251224_034126_batch1.log +0 -0
  13. B/logs/B/2k_port8003_gpu0_20251223_083422_batch1.log.pid +1 -0
  14. B/logs/B/2k_port8003_gpu0_20251223_141414_batch1.log +0 -0
  15. B/logs/B/2k_port8003_gpu0_20251223_141414_batch1.log.pid +1 -0
  16. B/logs/B/3k_port8004_gpu0_20251223_083422_batch1.log +0 -0
  17. B/logs/B/3k_port8004_gpu0_20251223_083422_batch1.log.pid +1 -0
  18. B/logs/B/3k_port8004_gpu0_20251223_141414_batch1.log +0 -0
  19. B/logs/B/3k_port8004_gpu0_20251223_141414_batch1.log.pid +1 -0
  20. B/logs/B/3k_port8004_gpu0_20251224_034126_batch1.log +0 -0
  21. B/logs/B/4k_port8005_gpu0_20251223_083422_batch1.log.pid +1 -0
  22. B/logs/B/4k_port8005_gpu0_20251223_141414_batch1.log +0 -0
  23. B/logs/B/4k_port8005_gpu0_20251223_141414_batch1.log.pid +1 -0
  24. B/logs/B/5k_port8006_gpu0_20251223_083422_batch1.log.pid +1 -0
  25. B/logs/B/5k_port8006_gpu0_20251223_141414_batch1.log +0 -0
  26. B/logs/B/5k_port8006_gpu0_20251223_141414_batch1.log.pid +1 -0
  27. B/logs/B/6k_port8002_gpu0_20251223_141414_batch2.log +0 -0
  28. B/logs/B/6k_port8002_gpu0_20251223_141414_batch2.log.pid +1 -0
  29. B/logs/B/7k_port8003_gpu0_20251223_141414_batch2.log +0 -0
  30. B/logs/B/7k_port8003_gpu0_20251223_141414_batch2.log.pid +1 -0
  31. B/logs/B/8k_port8004_gpu0_20251223_141414_batch2.log +0 -0
  32. B/logs/B/8k_port8004_gpu0_20251223_141414_batch2.log.pid +1 -0
  33. B/logs/B/9k_port8005_gpu0_20251223_141414_batch2.log.pid +1 -0
  34. C/.ipynb_checkpoints/RUNME-checkpoint.sh +386 -0
  35. C/.ipynb_checkpoints/runC-checkpoint.py +232 -0
  36. C/.ipynb_checkpoints/trainer_log-checkpoint.jsonl +0 -0
  37. C/logs/C/10k_port8006_gpu0_20251229_035809_batch2.log +0 -0
  38. C/logs/C/10k_port8006_gpu0_20251229_035809_batch2.log.pid +1 -0
  39. C/logs/C/10k_port8006_gpu0_20251229_060615_batch2.log +0 -0
  40. C/logs/C/1k_port8002_gpu0_20251229_060615_batch1.log.pid +1 -0
  41. C/logs/C/2k_port8003_gpu0_20251229_060615_batch1.log.pid +1 -0
  42. C/logs/C/3k_port8004_gpu0_20251229_060615_batch1.log +0 -0
  43. C/logs/C/4k_port8005_gpu0_20251229_060615_batch1.log +0 -0
  44. C/logs/C/5k_port8006_gpu0_20251229_060615_batch1.log.pid +1 -0
  45. C/logs/C/7k_port8003_gpu0_20251229_035809_batch2.log +0 -0
  46. C/logs/C/7k_port8003_gpu0_20251229_035809_batch2.log.pid +1 -0
  47. C/logs/C/8k_port8004_gpu0_20251229_035809_batch2.log +0 -0
  48. C/logs/C/9k_port8005_gpu0_20251229_035809_batch2.log +0 -0
  49. C/logs/C/9k_port8005_gpu0_20251229_035809_batch2.log.pid +1 -0
  50. C/logs/C/9k_port8005_gpu0_20251229_060615_batch2.log +0 -0
.ipynb_checkpoints/A-checkpoint.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: true
2
+ cutoff_len: 128
3
+ dataset: HNO3_train_wo_reasoning
4
+ # dataset: HNO3_train
5
+ # dataset: HNO3_train_fake_reasoning
6
+ # eval_dataset:
7
+ dataset_dir: /workspace/LLaMA-Factory/data
8
+ ddp_timeout: 180000000
9
+ # deepspeed: /workspace/LLaMA-Factory/examples/deepspeed/ds_z3_config.json
10
+ do_train: true
11
+ do_eval: false
12
+ enable_thinking: false
13
+ # eval_steps: 100
14
+ # eval_strategy: steps
15
+
16
+ finetuning_type: lora
17
+ lora_alpha: 16
18
+ lora_rank: 8
19
+ lora_dropout: 0.05
20
+ lora_target: all
21
+
22
+
23
+ flash_attn: auto
24
+ gradient_accumulation_steps: 1
25
+ include_num_input_tokens_seen: true
26
+ learning_rate: 5e-5
27
+ logging_steps: 1
28
+ lr_scheduler_type: constant_with_warmup
29
+ max_grad_norm: 2
30
+ max_samples: 100000000
31
+ model_name_or_path: /workspace/meta-llama/Llama-3.1-8B-Instruct
32
+ num_train_epochs: 100000000
33
+ optim: adamw_torch
34
+ output_dir: /workspace/v121rc_exp1/A
35
+ packing: false
36
+ # per_device_eval_batch_size: 64
37
+ per_device_train_batch_size: 64
38
+ plot_loss: true
39
+ preprocessing_num_workers: 16
40
+ report_to: wandb
41
+ save_steps: 1000
42
+ stage: sft
43
+ template: llama3
44
+ trust_remote_code: true
45
+ #val_size: 0.5
46
+ warmup_steps: 10
47
+ resize_vocab: true
48
+ weight_decay: 1
49
+ adam_beta1: 0.9
50
+ adam_beta2: 0.98
51
+ # eval_on_each_dataset: true
52
+ # compute_accuracy: true
53
+ # accuracy_at_last_token: true
54
+ # accuracy_with_generate: true
55
+
56
+ # predict_with_generate: true
57
+ # do_sample: false
58
+ # temperature: 0.0
59
+ # top_p: 1.0
60
+ # max_new_tokens: 1024
61
+ # group_by_length: false
62
+
63
+ # add_tokens: <MILLFIELD>,<Yes>,<No>,<think>,</think>
.ipynb_checkpoints/D-checkpoint.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: true
2
+ cutoff_len: 128
3
+ dataset: HNO2_train_wo_reasoning
4
+ # dataset: HNO2_train
5
+ # dataset: HNO2_train_fake_reasoning
6
+ # eval_dataset:
7
+ dataset_dir: /workspace/LLaMA-Factory/data
8
+ ddp_timeout: 180000000
9
+ # deepspeed: /workspace/LLaMA-Factory/examples/deepspeed/ds_z3_config.json
10
+ do_train: true
11
+ do_eval: false
12
+ enable_thinking: false
13
+ # eval_steps: 100
14
+ # eval_strategy: steps
15
+
16
+ finetuning_type: lora
17
+ lora_alpha: 16
18
+ lora_rank: 8
19
+ lora_dropout: 0.05
20
+ lora_target: all
21
+
22
+
23
+ flash_attn: auto
24
+ gradient_accumulation_steps: 1
25
+ include_num_input_tokens_seen: true
26
+ learning_rate: 5e-5
27
+ logging_steps: 1
28
+ lr_scheduler_type: constant_with_warmup
29
+ max_grad_norm: 2
30
+ max_samples: 100000000
31
+ model_name_or_path: /workspace/meta-llama/Llama-3.1-8B-Instruct
32
+ num_train_epochs: 100000000
33
+ optim: adamw_torch
34
+ output_dir: /workspace/v121rc_exp1/D
35
+ packing: false
36
+ # per_device_eval_batch_size: 64
37
+ per_device_train_batch_size: 64
38
+ plot_loss: true
39
+ preprocessing_num_workers: 16
40
+ report_to: wandb
41
+ save_steps: 1000
42
+ stage: sft
43
+ template: llama3
44
+ trust_remote_code: true
45
+ #val_size: 0.5
46
+ warmup_steps: 10
47
+ resize_vocab: true
48
+ weight_decay: 1
49
+ adam_beta1: 0.9
50
+ adam_beta2: 0.98
51
+ # eval_on_each_dataset: true
52
+ # compute_accuracy: true
53
+ # accuracy_at_last_token: true
54
+ # accuracy_with_generate: true
55
+
56
+ # predict_with_generate: true
57
+ # do_sample: false
58
+ # temperature: 0.0
59
+ # top_p: 1.0
60
+ # max_new_tokens: 1024
61
+ # group_by_length: false
62
+
63
+ # add_tokens: <MILLFIELD>,<Yes>,<No>,<think>,</think>
.ipynb_checkpoints/G-checkpoint.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: true
2
+ cutoff_len: 128
3
+ dataset: HNO1_train_wo_reasoning
4
+ # dataset: HNO1_train
5
+ # dataset: HNO1_train_fake_reasoning
6
+ # eval_dataset:
7
+ dataset_dir: /workspace/LLaMA-Factory/data
8
+ ddp_timeout: 180000000
9
+ # deepspeed: /workspace/LLaMA-Factory/examples/deepspeed/ds_z3_config.json
10
+ do_train: true
11
+ do_eval: false
12
+ enable_thinking: false
13
+ # eval_steps: 100
14
+ # eval_strategy: steps
15
+
16
+ finetuning_type: lora
17
+ lora_alpha: 16
18
+ lora_rank: 8
19
+ lora_dropout: 0.05
20
+ lora_target: all
21
+
22
+
23
+ flash_attn: auto
24
+ gradient_accumulation_steps: 1
25
+ include_num_input_tokens_seen: true
26
+ learning_rate: 5e-5
27
+ logging_steps: 1
28
+ lr_scheduler_type: constant_with_warmup
29
+ max_grad_norm: 2
30
+ max_samples: 100000000
31
+ model_name_or_path: /workspace/meta-llama/Llama-3.1-8B-Instruct
32
+ num_train_epochs: 100000000
33
+ optim: adamw_torch
34
+ output_dir: /workspace/v121rc_exp1/G
35
+ packing: false
36
+ # per_device_eval_batch_size: 64
37
+ per_device_train_batch_size: 64
38
+ plot_loss: true
39
+ preprocessing_num_workers: 16
40
+ report_to: wandb
41
+ save_steps: 1000
42
+ stage: sft
43
+ template: llama3
44
+ trust_remote_code: true
45
+ #val_size: 0.5
46
+ warmup_steps: 10
47
+ resize_vocab: true
48
+ weight_decay: 1
49
+ adam_beta1: 0.9
50
+ adam_beta2: 0.98
51
+ # eval_on_each_dataset: true
52
+ # compute_accuracy: true
53
+ # accuracy_at_last_token: true
54
+ # accuracy_with_generate: true
55
+
56
+ # predict_with_generate: true
57
+ # do_sample: false
58
+ # temperature: 0.0
59
+ # top_p: 1.0
60
+ # max_new_tokens: 1024
61
+ # group_by_length: false
62
+
63
+ # add_tokens: <MILLFIELD>,<Yes>,<No>,<think>,</think>
.ipynb_checkpoints/H-checkpoint.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: true
2
+ cutoff_len: 128
3
+ # dataset: HNO1_train_wo_reasoning
4
+ dataset: HNO1_train
5
+ # dataset: HNO1_train_fake_reasoning
6
+ # eval_dataset:
7
+ dataset_dir: /workspace/LLaMA-Factory/data
8
+ ddp_timeout: 180000000
9
+ # deepspeed: /workspace/LLaMA-Factory/examples/deepspeed/ds_z3_config.json
10
+ do_train: true
11
+ do_eval: false
12
+ enable_thinking: false
13
+ # eval_steps: 100
14
+ # eval_strategy: steps
15
+
16
+ finetuning_type: lora
17
+ lora_alpha: 16
18
+ lora_rank: 8
19
+ lora_dropout: 0.05
20
+ lora_target: all
21
+
22
+
23
+ flash_attn: auto
24
+ gradient_accumulation_steps: 1
25
+ include_num_input_tokens_seen: true
26
+ learning_rate: 5e-5
27
+ logging_steps: 1
28
+ lr_scheduler_type: constant_with_warmup
29
+ max_grad_norm: 2
30
+ max_samples: 100000000
31
+ model_name_or_path: /workspace/meta-llama/Llama-3.1-8B-Instruct
32
+ num_train_epochs: 100000000
33
+ optim: adamw_torch
34
+ output_dir: /workspace/v121rc_exp1/H
35
+ packing: false
36
+ # per_device_eval_batch_size: 64
37
+ per_device_train_batch_size: 64
38
+ plot_loss: true
39
+ preprocessing_num_workers: 16
40
+ report_to: wandb
41
+ save_steps: 1000
42
+ stage: sft
43
+ template: llama3
44
+ trust_remote_code: true
45
+ #val_size: 0.5
46
+ warmup_steps: 10
47
+ resize_vocab: true
48
+ weight_decay: 1
49
+ adam_beta1: 0.9
50
+ adam_beta2: 0.98
51
+ # eval_on_each_dataset: true
52
+ # compute_accuracy: true
53
+ # accuracy_at_last_token: true
54
+ # accuracy_with_generate: true
55
+
56
+ # predict_with_generate: true
57
+ # do_sample: false
58
+ # temperature: 0.0
59
+ # top_p: 1.0
60
+ # max_new_tokens: 1024
61
+ # group_by_length: false
62
+
63
+ # add_tokens: <MILLFIELD>,<Yes>,<No>,<think>,</think>
.ipynb_checkpoints/I-checkpoint.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bf16: true
2
+ cutoff_len: 128
3
+ # dataset: HNO1_train_wo_reasoning
4
+ # dataset: HNO1_train
5
+ dataset: HNO1_train_fake_reasoning
6
+ # eval_dataset:
7
+ dataset_dir: /workspace/LLaMA-Factory/data
8
+ ddp_timeout: 180000000
9
+ # deepspeed: /workspace/LLaMA-Factory/examples/deepspeed/ds_z3_config.json
10
+ do_train: true
11
+ do_eval: false
12
+ enable_thinking: false
13
+ # eval_steps: 100
14
+ # eval_strategy: steps
15
+
16
+ finetuning_type: lora
17
+ lora_alpha: 16
18
+ lora_rank: 8
19
+ lora_dropout: 0.05
20
+ lora_target: all
21
+
22
+
23
+ flash_attn: auto
24
+ gradient_accumulation_steps: 1
25
+ include_num_input_tokens_seen: true
26
+ learning_rate: 5e-5
27
+ logging_steps: 1
28
+ lr_scheduler_type: constant_with_warmup
29
+ max_grad_norm: 2
30
+ max_samples: 100000000
31
+ model_name_or_path: /workspace/meta-llama/Llama-3.1-8B-Instruct
32
+ num_train_epochs: 100000000
33
+ optim: adamw_torch
34
+ output_dir: /workspace/v121rc_exp1/I
35
+ packing: false
36
+ # per_device_eval_batch_size: 64
37
+ per_device_train_batch_size: 64
38
+ plot_loss: true
39
+ preprocessing_num_workers: 16
40
+ report_to: wandb
41
+ save_steps: 1000
42
+ stage: sft
43
+ template: llama3
44
+ trust_remote_code: true
45
+ #val_size: 0.5
46
+ warmup_steps: 10
47
+ resize_vocab: true
48
+ weight_decay: 1
49
+ adam_beta1: 0.9
50
+ adam_beta2: 0.98
51
+ # eval_on_each_dataset: true
52
+ # compute_accuracy: true
53
+ # accuracy_at_last_token: true
54
+ # accuracy_with_generate: true
55
+
56
+ # predict_with_generate: true
57
+ # do_sample: false
58
+ # temperature: 0.0
59
+ # top_p: 1.0
60
+ # max_new_tokens: 1024
61
+ # group_by_length: false
62
+
63
+ # add_tokens: <MILLFIELD>,<Yes>,<No>,<think>,</think>
B/logs/B/10k_port8006_gpu0_20251223_141414_batch2.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/1k_port8002_gpu0_20251223_083422_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/1k_port8002_gpu0_20251223_083422_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 42535
B/logs/B/1k_port8002_gpu0_20251223_141414_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/1k_port8002_gpu0_20251223_141414_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 260
B/logs/B/1k_port8002_gpu0_20251224_034005_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 9333
B/logs/B/1k_port8002_gpu0_20251224_034126_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/2k_port8003_gpu0_20251223_083422_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 43563
B/logs/B/2k_port8003_gpu0_20251223_141414_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/2k_port8003_gpu0_20251223_141414_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 1131
B/logs/B/3k_port8004_gpu0_20251223_083422_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/3k_port8004_gpu0_20251223_083422_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 44181
B/logs/B/3k_port8004_gpu0_20251223_141414_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/3k_port8004_gpu0_20251223_141414_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 1530
B/logs/B/3k_port8004_gpu0_20251224_034126_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/4k_port8005_gpu0_20251223_083422_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 44677
B/logs/B/4k_port8005_gpu0_20251223_141414_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/4k_port8005_gpu0_20251223_141414_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 1935
B/logs/B/5k_port8006_gpu0_20251223_083422_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 45300
B/logs/B/5k_port8006_gpu0_20251223_141414_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/5k_port8006_gpu0_20251223_141414_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 2340
B/logs/B/6k_port8002_gpu0_20251223_141414_batch2.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/6k_port8002_gpu0_20251223_141414_batch2.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 9295
B/logs/B/7k_port8003_gpu0_20251223_141414_batch2.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/7k_port8003_gpu0_20251223_141414_batch2.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 9692
B/logs/B/8k_port8004_gpu0_20251223_141414_batch2.log ADDED
The diff for this file is too large to render. See raw diff
 
B/logs/B/8k_port8004_gpu0_20251223_141414_batch2.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 10083
B/logs/B/9k_port8005_gpu0_20251223_141414_batch2.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 10454
C/.ipynb_checkpoints/RUNME-checkpoint.sh ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # -----------------------------
5
+ # User config
6
+ # -----------------------------
7
+ config="C"
8
+ CONFIG_DIR="/workspace/v121rc_exp1/${config}"
9
+
10
+ # YAML generation defaults
11
+ MODEL_NAME_OR_PATH="/workspace/meta-llama/Llama-3.1-8B-Instruct"
12
+ TEMPLATE="llama3"
13
+ FINETUNING_TYPE="lora"
14
+ INFER_BACKEND="huggingface"
15
+ TRUST_REMOTE_CODE="true"
16
+
17
+ # Launch config
18
+ BASE_PORT=8002
19
+ SLEEP_BETWEEN_LAUNCHES_SEC=10
20
+ VRAM_THRESHOLD_PCT=80 # if GPU >= threshold after launch, try next GPU for next ckpt
21
+ BATCH_MIN_MODELS=1 # start eval once at least this many services are up
22
+
23
+ # Eval config (passed to python)
24
+ PYTHON_EVAL="/workspace/v121rc_exp1/C/runC.py"
25
+ EVAL_WORKING_DIR="/workspace/v121rc_exp1/PandaEval12_2/HNO3"
26
+ EVAL_SUBWORD="fake_reasoning"
27
+ FORBIDDEN_SUBWORDS_JSON="[]"
28
+ PARTICULAR=""
29
+ SAVE_DIR="${CONFIG_DIR}"
30
+
31
+ # Always stop services between batches to free VRAM
32
+ STOP_SERVICES_BETWEEN_BATCHES="true"
33
+
34
+ # -----------------------------
35
+ # Setup logging
36
+ # -----------------------------
37
+ LOG_ROOT="${CONFIG_DIR}/logs"
38
+ mkdir -p "${LOG_ROOT}/${config}"
39
+ timestamp=$(date +"%Y%m%d_%H%M%S")
40
+
41
+ # -----------------------------
42
+ # Helpers
43
+ # -----------------------------
44
+ require_cmd() {
45
+ command -v "$1" >/dev/null 2>&1 || { echo "ERROR: missing command: $1" >&2; exit 1; }
46
+ }
47
+ require_cmd nvidia-smi
48
+ require_cmd python
49
+ require_cmd curl
50
+ require_cmd sort
51
+ require_cmd awk
52
+
53
+ num_gpus() {
54
+ nvidia-smi -L | wc -l | awk '{print $1}'
55
+ }
56
+
57
+ gpu_mem_pct() {
58
+ local gpu="$1"
59
+ nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader,nounits -i "${gpu}" \
60
+ | awk -F',' '{used=$1; total=$2; if (total==0) {print 100} else {printf("%d\n", (used/total)*100)} }'
61
+ }
62
+
63
+ launch_service () {
64
+ local gpu="$1"
65
+ local api_port="$2"
66
+ local yaml_path="$3"
67
+ local log_file="$4"
68
+ local pid_file="$5"
69
+
70
+ echo "Starting (GPU ${gpu}) port ${api_port} : ${yaml_path}"
71
+ echo "Log: ${log_file}"
72
+
73
+ API_PORT="${api_port}" CUDA_VISIBLE_DEVICES="${gpu}" \
74
+ llamafactory-cli api "${yaml_path}" \
75
+ > "${log_file}" 2>&1 &
76
+
77
+ echo $! > "${pid_file}"
78
+ }
79
+
80
+ wait_for_endpoint () {
81
+ local port="$1"
82
+ local url="http://localhost:${port}/v1/models"
83
+
84
+ for attempt in {1..120}; do
85
+ if curl -sS -m 2 "${url}" >/dev/null 2>&1; then
86
+ echo " ready: ${url}"
87
+ return 0
88
+ fi
89
+ sleep 2
90
+ done
91
+
92
+ echo "ERROR: Endpoint did not become ready: ${url}" >&2
93
+ return 1
94
+ }
95
+
96
+ stop_batch_services () {
97
+ local pidfiles=("$@")
98
+ echo "Stopping batch services: ${#pidfiles[@]} processes"
99
+ for pf in "${pidfiles[@]}"; do
100
+ [[ -f "${pf}" ]] || continue
101
+ pid="$(cat "${pf}" || true)"
102
+ if [[ -n "${pid}" ]] && kill -0 "${pid}" >/dev/null 2>&1; then
103
+ kill "${pid}" || true
104
+ fi
105
+ done
106
+ }
107
+
108
+ # -----------------------------
109
+ # Discover checkpoints
110
+ # -----------------------------
111
+ discover_checkpoints_json () {
112
+ shopt -s nullglob
113
+ local ckpt_dirs=( "${CONFIG_DIR}"/checkpoint-* )
114
+ if (( ${#ckpt_dirs[@]} == 0 )); then
115
+ echo "ERROR: No checkpoint-* folders found under: ${CONFIG_DIR}" >&2
116
+ exit 1
117
+ fi
118
+
119
+ mapfile -t ckpt_dirs < <(printf "%s\n" "${ckpt_dirs[@]}" | sort -V)
120
+
121
+ local ckpts=()
122
+ for ckpt_dir in "${ckpt_dirs[@]}"; do
123
+ local base step
124
+ base="$(basename "${ckpt_dir}")"
125
+ step="${base#checkpoint-}"
126
+ if [[ "${step}" =~ ^[0-9]+$ ]]; then
127
+ ckpts+=( "${step}" )
128
+ fi
129
+ done
130
+
131
+ local json="["
132
+ for i in "${!ckpts[@]}"; do
133
+ (( i>0 )) && json+=", "
134
+ json+="${ckpts[$i]}"
135
+ done
136
+ json+="]"
137
+ echo "${json}"
138
+ }
139
+
140
+ # -----------------------------
141
+ # Compute which checkpoints still need launching (resume-aware)
142
+ # -----------------------------
143
+ compute_needed_checkpoints_json () {
144
+ local all_ckpts_json="$1"
145
+
146
+ python - "${all_ckpts_json}" <<'PY'
147
+ import os, json, sys
148
+
149
+ CONFIG_DIR = os.environ.get("CONFIG_DIR")
150
+ SAVE_DIR = os.environ.get("SAVE_DIR", CONFIG_DIR)
151
+ WORKING_DIR = os.environ.get("EVAL_WORKING_DIR")
152
+ SUBWORD = os.environ.get("EVAL_SUBWORD", "")
153
+ FORBIDDEN = json.loads(os.environ.get("FORBIDDEN_SUBWORDS_JSON", "[]"))
154
+ PARTICULAR = os.environ.get("PARTICULAR", "")
155
+
156
+ all_ckpts = json.loads(sys.argv[1])
157
+
158
+ def should_process(fn: str) -> bool:
159
+ if SUBWORD and SUBWORD not in fn:
160
+ return False
161
+ if any(s in fn for s in FORBIDDEN):
162
+ return False
163
+ if PARTICULAR and PARTICULAR not in fn:
164
+ return False
165
+ return fn.endswith(".json")
166
+
167
+ eval_files = sorted([fn for fn in os.listdir(WORKING_DIR) if should_process(fn)])
168
+ if not eval_files:
169
+ print(json.dumps(all_ckpts))
170
+ raise SystemExit(0)
171
+
172
+ def file_complete_for_ckpt(eval_file: str, ckpt: int) -> bool:
173
+ in_path = os.path.join(WORKING_DIR, eval_file)
174
+ out_path = os.path.join(SAVE_DIR, eval_file.replace(".json", "_results.json"))
175
+ if not os.path.exists(out_path):
176
+ return False
177
+ try:
178
+ with open(in_path, "r") as f:
179
+ in_data = json.load(f)
180
+ with open(out_path, "r") as f:
181
+ out_data = json.load(f)
182
+ except Exception:
183
+ return False
184
+
185
+ if not isinstance(in_data, list) or not isinstance(out_data, list):
186
+ return False
187
+ if len(out_data) != len(in_data):
188
+ return False
189
+
190
+ key = f"step_{ckpt}"
191
+ for e in out_data:
192
+ v = e.get(key) or {}
193
+ out = v.get("output", "")
194
+ if not isinstance(out, str) or out.strip() == "":
195
+ return False
196
+ return True
197
+
198
+ needed = []
199
+ for ckpt in all_ckpts:
200
+ done_everywhere = True
201
+ for ef in eval_files:
202
+ if not file_complete_for_ckpt(ef, ckpt):
203
+ done_everywhere = False
204
+ break
205
+ if not done_everywhere:
206
+ needed.append(ckpt)
207
+
208
+ print(json.dumps(needed))
209
+ PY
210
+ }
211
+
212
+ # -----------------------------
213
+ # Generate YAML for one checkpoint
214
+ # -----------------------------
215
+ write_yaml_for_ckpt () {
216
+ local step="$1"
217
+
218
+ python - "${step}" <<'PY'
219
+ import os, sys
220
+ step = int(sys.argv[1])
221
+
222
+ CONFIG_DIR = os.environ["CONFIG_DIR"]
223
+ MODEL = os.environ["MODEL_NAME_OR_PATH"]
224
+ TEMPLATE = os.environ["TEMPLATE"]
225
+ FINETUNING_TYPE = os.environ["FINETUNING_TYPE"]
226
+ INFER_BACKEND = os.environ["INFER_BACKEND"]
227
+ TRUST_REMOTE_CODE = os.environ["TRUST_REMOTE_CODE"]
228
+
229
+ ckpt_dir = os.path.join(CONFIG_DIR, f"checkpoint-{step}")
230
+ if not os.path.isdir(ckpt_dir):
231
+ raise SystemExit(f"Missing checkpoint dir: {ckpt_dir}")
232
+
233
+ name = f"{step//1000}k" if step % 1000 == 0 else str(step)
234
+ yaml_path = os.path.join(CONFIG_DIR, f"{name}.yaml")
235
+
236
+ with open(yaml_path, "w") as f:
237
+ f.write(
238
+ f"model_name_or_path: {MODEL}\n"
239
+ f"adapter_name_or_path: {ckpt_dir}\n"
240
+ f"template: {TEMPLATE}\n"
241
+ f"finetuning_type: {FINETUNING_TYPE}\n"
242
+ f"infer_backend: {INFER_BACKEND}\n"
243
+ f"trust_remote_code: {TRUST_REMOTE_CODE}\n"
244
+ )
245
+ print(yaml_path)
246
+ PY
247
+ }
248
+
249
+ # -----------------------------
250
+ # Main (batch loop)
251
+ # -----------------------------
252
+ export CONFIG_DIR
253
+ export SAVE_DIR
254
+ export EVAL_WORKING_DIR
255
+ export EVAL_SUBWORD
256
+ export FORBIDDEN_SUBWORDS_JSON
257
+ export PARTICULAR
258
+
259
+ export MODEL_NAME_OR_PATH
260
+ export TEMPLATE
261
+ export FINETUNING_TYPE
262
+ export INFER_BACKEND
263
+ export TRUST_REMOTE_CODE
264
+
265
+ ALL_CKPTS_JSON="$(discover_checkpoints_json)"
266
+ GPU_COUNT="$(num_gpus)"
267
+ echo "Detected GPUs: ${GPU_COUNT}"
268
+ echo "All checkpoints found: ${ALL_CKPTS_JSON}"
269
+
270
+ batch_idx=0
271
+
272
+ while true; do
273
+ NEEDED_CKPTS_JSON="$(compute_needed_checkpoints_json "${ALL_CKPTS_JSON}")"
274
+ echo "Still needed checkpoints: ${NEEDED_CKPTS_JSON}"
275
+
276
+ if [[ "${NEEDED_CKPTS_JSON}" == "[]" ]]; then
277
+ echo "All checkpoints complete across outputs. Done."
278
+ exit 0
279
+ fi
280
+
281
+ batch_idx=$((batch_idx + 1))
282
+ echo "=============================="
283
+ echo "Batch ${batch_idx}: launching what fits under VRAM threshold (${VRAM_THRESHOLD_PCT}%)"
284
+ echo "=============================="
285
+
286
+ # Parse needed list into bash array
287
+ mapfile -t NEEDED_LIST < <(python - "${NEEDED_CKPTS_JSON}" <<'PY'
288
+ import json, sys
289
+ for x in json.loads(sys.argv[1]):
290
+ print(int(x))
291
+ PY
292
+ )
293
+
294
+ MODELS_JSON="{"
295
+ first=1
296
+ launched=0
297
+
298
+ # track launched service pidfiles to stop after batch
299
+ batch_pidfiles=()
300
+
301
+ port="${BASE_PORT}"
302
+ gpu=0
303
+
304
+ for ckpt in "${NEEDED_LIST[@]}"; do
305
+ # Find a GPU with headroom; if none, stop launching more in this batch.
306
+ found_gpu="false"
307
+ for ((try=0; try<GPU_COUNT; try++)); do
308
+ pct="$(gpu_mem_pct "${gpu}")"
309
+ if (( pct < VRAM_THRESHOLD_PCT )); then
310
+ found_gpu="true"
311
+ break
312
+ fi
313
+ gpu=$((gpu + 1))
314
+ if (( gpu >= GPU_COUNT )); then gpu=0; fi
315
+ done
316
+
317
+ if [[ "${found_gpu}" != "true" ]]; then
318
+ echo "No GPU under ${VRAM_THRESHOLD_PCT}% VRAM. Stop launching; start eval with current batch."
319
+ break
320
+ fi
321
+
322
+ yaml_path="$(write_yaml_for_ckpt "${ckpt}")"
323
+ tag="$(basename "${yaml_path}" .yaml)"
324
+ log_file="${LOG_ROOT}/${config}/${tag}_port${port}_gpu${gpu}_${timestamp}_batch${batch_idx}.log"
325
+ pid_file="${log_file}.pid"
326
+
327
+ launch_service "${gpu}" "${port}" "${yaml_path}" "${log_file}" "${pid_file}"
328
+ batch_pidfiles+=( "${pid_file}" )
329
+
330
+ if ! wait_for_endpoint "${port}"; then
331
+ echo "Endpoint failed on port ${port}; stopping batch and exiting."
332
+ stop_batch_services "${batch_pidfiles[@]}"
333
+ exit 1
334
+ fi
335
+
336
+ url="http://localhost:${port}/v1/chat/completions"
337
+ if (( first == 1 )); then
338
+ MODELS_JSON+="\"${url}\": ${ckpt}"
339
+ first=0
340
+ else
341
+ MODELS_JSON+=", \"${url}\": ${ckpt}"
342
+ fi
343
+
344
+ launched=$((launched + 1))
345
+
346
+ pct_after="$(gpu_mem_pct "${gpu}")"
347
+ echo "GPU ${gpu} VRAM after launch: ${pct_after}%"
348
+ if (( pct_after >= VRAM_THRESHOLD_PCT )); then
349
+ gpu=$((gpu + 1))
350
+ if (( gpu >= GPU_COUNT )); then gpu=0; fi
351
+ fi
352
+
353
+ port=$((port + 1))
354
+ echo "Sleeping ${SLEEP_BETWEEN_LAUNCHES_SEC}s to avoid VRAM spikes..."
355
+ sleep "${SLEEP_BETWEEN_LAUNCHES_SEC}"
356
+ done
357
+
358
+ MODELS_JSON+="}"
359
+ echo "Launched models in batch ${batch_idx}: ${launched}"
360
+ echo "MODELS_JSON=${MODELS_JSON}"
361
+
362
+ if (( launched < BATCH_MIN_MODELS )); then
363
+ echo "ERROR: Could not launch even ${BATCH_MIN_MODELS} model(s) under VRAM threshold."
364
+ echo "Either increase VRAM_THRESHOLD_PCT, reduce model size, or free VRAM."
365
+ exit 1
366
+ fi
367
+
368
+ # Run eval for this batch
369
+ export MODELS_JSON
370
+ export CKPTS_JSON="[]" # unused when MODELS_JSON exists, but keep it defined
371
+ export BASE_PORT="${BASE_PORT}"
372
+
373
+ echo "Running eval for batch ${batch_idx}: python ${PYTHON_EVAL}"
374
+ python "${PYTHON_EVAL}"
375
+
376
+ # Stop services to free VRAM for next batch
377
+ if [[ "${STOP_SERVICES_BETWEEN_BATCHES}" == "true" ]]; then
378
+ stop_batch_services "${batch_pidfiles[@]}"
379
+ echo "Batch ${batch_idx} services stopped."
380
+ # give GPU a moment to release memory
381
+ sleep 5
382
+ else
383
+ echo "Leaving batch services running (not recommended for batch mode)."
384
+ echo "This may prevent future batches from launching due to VRAM saturation."
385
+ fi
386
+ done
C/.ipynb_checkpoints/runC-checkpoint.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import hashlib
4
+ from typing import Any, Dict, Tuple, List
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+
7
+ from tqdm import tqdm
8
+ import requests
9
+ from loguru import logger
10
+
11
+
12
+ def getenv_str(key: str, default: str) -> str:
13
+ v = os.environ.get(key)
14
+ return default if v is None else v
15
+
16
+
17
+ def getenv_int(key: str, default: int) -> int:
18
+ v = os.environ.get(key)
19
+ if v is None or v.strip() == "":
20
+ return default
21
+ try:
22
+ return int(v)
23
+ except ValueError:
24
+ raise ValueError(f"Env var {key} must be int, got: {v!r}")
25
+
26
+
27
+ # ----------------------------
28
+ # Read config from environment
29
+ # ----------------------------
30
+ CONFIG_DIR = getenv_str("CONFIG_DIR", "/workspace/v121rc_exp1/C")
31
+ SAVE_DIR = getenv_str("SAVE_DIR", CONFIG_DIR)
32
+
33
+ WORKING_DIR = getenv_str("EVAL_WORKING_DIR", "/workspace/v121rc_exp1/EVAL/HNO3")
34
+ WORKING_EVAL_SUBWORD = getenv_str("EVAL_SUBWORD", "fake_reasoning")
35
+
36
+ FORBIDDEN_SUBWORDS: List[str] = json.loads(getenv_str("FORBIDDEN_SUBWORDS_JSON", "[]"))
37
+ PARTICULAR = getenv_str("PARTICULAR", "")
38
+
39
+ BASE_PORT = getenv_int("BASE_PORT", 8002)
40
+
41
+ # Prefer explicit URL->ckpt mapping from RUNME.sh
42
+ MODELS_JSON_ENV = getenv_str("MODELS_JSON", "").strip()
43
+ if MODELS_JSON_ENV:
44
+ MODELS: Dict[str, int] = json.loads(MODELS_JSON_ENV)
45
+ MODELS = {str(k): int(v) for k, v in MODELS.items()}
46
+ else:
47
+ # Fallback sequential mapping (rarely used now)
48
+ checkpoints = json.loads(getenv_str("CKPTS_JSON", "[1000]"))
49
+ MODELS = {f"http://localhost:{BASE_PORT + i}/v1/chat/completions": int(checkpoints[i])
50
+ for i in range(len(checkpoints))}
51
+
52
+ MAX_WORKERS = min(16, max(1, len(MODELS)))
53
+
54
+
55
+ def thought_generator_with_local_LLM_requests(
56
+ message,
57
+ LLM_model,
58
+ LLM_max_new_tokens=128,
59
+ n=1,
60
+ API_URL="http://localhost:8000/v1/chat/completions",
61
+ timeout_sec=600,
62
+ stream=False,
63
+ ) -> str | list[Any] | Any:
64
+ # Your eval uses stream=False; keep it simple.
65
+ payload = {
66
+ "model": LLM_model,
67
+ "messages": message,
68
+ "n": n,
69
+ "max_tokens": LLM_max_new_tokens,
70
+ }
71
+
72
+ r = requests.post(
73
+ API_URL,
74
+ json=payload,
75
+ headers={"Content-Type": "application/json", "Authorization": "Bearer 0"},
76
+ timeout=timeout_sec,
77
+ )
78
+
79
+ if r.status_code != 200:
80
+ logger.error(f"LLM API error {r.status_code}: {r.text}")
81
+ raise RuntimeError(f"LLM API returned {r.status_code}")
82
+
83
+ data = r.json()
84
+ if n == 1:
85
+ return data["choices"][0]["message"]["content"]
86
+ return [c["message"]["content"] for c in data["choices"]]
87
+
88
+
89
+ def extract_label(response: str) -> str:
90
+ has_yes = "Yes" in response
91
+ has_no = "No" in response
92
+ if has_yes and not has_no:
93
+ return "Yes"
94
+ if has_no and not has_yes:
95
+ return "No"
96
+ return ""
97
+
98
+
99
+ def call_one_model(
100
+ model_url: str,
101
+ ckpt: int,
102
+ msgs,
103
+ gold_label: str,
104
+ ) -> Tuple[int, Dict[str, Any]]:
105
+ try:
106
+ response = thought_generator_with_local_LLM_requests(
107
+ message=msgs,
108
+ LLM_model="custom-model",
109
+ LLM_max_new_tokens=128,
110
+ n=1,
111
+ API_URL=model_url,
112
+ timeout_sec=300,
113
+ stream=False,
114
+ )
115
+ except Exception as e:
116
+ logger.error(f"Error getting response from model at {model_url}: {e}")
117
+ response = ""
118
+
119
+ label = extract_label(response)
120
+ return ckpt, {
121
+ "label": label,
122
+ "output": response,
123
+ "full_output": response,
124
+ "accuracy": 1 if label == gold_label else 0,
125
+ }
126
+
127
+
128
+ def entry_uid(system: str, prompt: str, gold_label: str, gold_output: str) -> str:
129
+ payload = {"system": system, "prompt": prompt, "gold_label": gold_label, "gold_output": gold_output}
130
+ s = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
131
+ return hashlib.sha1(s.encode("utf-8")).hexdigest()
132
+
133
+
134
+ def load_cache(path: str) -> Dict[str, Dict[str, Any]]:
135
+ if not os.path.exists(path):
136
+ return {}
137
+ try:
138
+ with open(path, "r") as f:
139
+ data = json.load(f)
140
+ cache = {}
141
+ for e in data:
142
+ uid = entry_uid(e.get("system", ""), e.get("prompt", ""), e.get("gold_label", ""), e.get("gold_output", ""))
143
+ cache[uid] = e
144
+ logger.info(f"Loaded cache from {path}: {len(cache)} entries")
145
+ return cache
146
+ except Exception as ex:
147
+ logger.warning(f"Failed to load cache from {path} (starting fresh): {ex}")
148
+ return {}
149
+
150
+
151
+ def should_run_step(o_entry: Dict[str, Any], ckpt: int) -> bool:
152
+ key = f"step_{ckpt}"
153
+ if key not in o_entry:
154
+ return True
155
+ v = o_entry.get(key) or {}
156
+ out = v.get("output", "")
157
+ return not isinstance(out, str) or out.strip() == ""
158
+
159
+
160
+ def atomic_write_json(path: str, obj: Any) -> None:
161
+ tmp = path + ".tmp"
162
+ with open(tmp, "w") as f:
163
+ json.dump(obj, f, indent=2, ensure_ascii=False)
164
+ os.replace(tmp, path)
165
+
166
+
167
+ def should_process_file(filename: str) -> bool:
168
+ if WORKING_EVAL_SUBWORD and WORKING_EVAL_SUBWORD not in filename:
169
+ return False
170
+ if any(sub in filename for sub in FORBIDDEN_SUBWORDS):
171
+ return False
172
+ if PARTICULAR and PARTICULAR not in filename:
173
+ return False
174
+ return filename.endswith(".json")
175
+
176
+
177
+ if __name__ == "__main__":
178
+ logger.info(f"WORKING_DIR={WORKING_DIR}")
179
+ logger.info(f"SAVE_DIR={SAVE_DIR}")
180
+ logger.info(f"MODELS={MODELS}")
181
+ logger.info(f"MAX_WORKERS={MAX_WORKERS}")
182
+
183
+ if not MODELS:
184
+ print("No models to evaluate (MODELS is empty). Exiting.")
185
+ raise SystemExit(0)
186
+
187
+ os.makedirs(SAVE_DIR, exist_ok=True)
188
+
189
+ for original_eval_log_file in os.listdir(WORKING_DIR):
190
+ if not should_process_file(original_eval_log_file):
191
+ continue
192
+ print(f"Working in {original_eval_log_file}")
193
+
194
+ original_eval_file = os.path.join(WORKING_DIR, original_eval_log_file)
195
+ output_eval_file = os.path.join(SAVE_DIR, original_eval_log_file.replace(".json", "_results.json"))
196
+
197
+ with open(original_eval_file, "r") as f:
198
+ eval_data: list[dict] = json.load(f)
199
+
200
+ cache_map = load_cache(output_eval_file)
201
+ output_eval_data = []
202
+
203
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
204
+ for idx, entry in enumerate(tqdm(eval_data)):
205
+ system = entry["system"]
206
+ prompt = entry["prompt"]
207
+ gold_label = entry["gold_label"]
208
+ gold_output = entry["gold_output"]
209
+
210
+ uid = entry_uid(system, prompt, gold_label, gold_output)
211
+ o_entry = cache_map.get(uid, {})
212
+ o_entry.update({"system": system, "prompt": prompt, "gold_label": gold_label, "gold_output": gold_output})
213
+
214
+ msgs = [{"role": "system", "content": system}, {"role": "user", "content": prompt}]
215
+
216
+ futures = []
217
+ for model_url, ckpt in MODELS.items():
218
+ if should_run_step(o_entry, ckpt):
219
+ futures.append(executor.submit(call_one_model, model_url, ckpt, msgs, gold_label))
220
+
221
+ for fut in as_completed(futures):
222
+ ckpt, result = fut.result()
223
+ o_entry[f"step_{ckpt}"] = result
224
+
225
+ output_eval_data.append(o_entry)
226
+
227
+ if (idx + 1) % 50 == 0:
228
+ atomic_write_json(output_eval_file, output_eval_data)
229
+
230
+ atomic_write_json(output_eval_file, output_eval_data)
231
+
232
+ print("Evaluation with checkpoints completed.")
C/.ipynb_checkpoints/trainer_log-checkpoint.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
C/logs/C/10k_port8006_gpu0_20251229_035809_batch2.log ADDED
The diff for this file is too large to render. See raw diff
 
C/logs/C/10k_port8006_gpu0_20251229_035809_batch2.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 8738
C/logs/C/10k_port8006_gpu0_20251229_060615_batch2.log ADDED
The diff for this file is too large to render. See raw diff
 
C/logs/C/1k_port8002_gpu0_20251229_060615_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 262
C/logs/C/2k_port8003_gpu0_20251229_060615_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 626
C/logs/C/3k_port8004_gpu0_20251229_060615_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
C/logs/C/4k_port8005_gpu0_20251229_060615_batch1.log ADDED
The diff for this file is too large to render. See raw diff
 
C/logs/C/5k_port8006_gpu0_20251229_060615_batch1.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 1893
C/logs/C/7k_port8003_gpu0_20251229_035809_batch2.log ADDED
The diff for this file is too large to render. See raw diff
 
C/logs/C/7k_port8003_gpu0_20251229_035809_batch2.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 7655
C/logs/C/8k_port8004_gpu0_20251229_035809_batch2.log ADDED
The diff for this file is too large to render. See raw diff
 
C/logs/C/9k_port8005_gpu0_20251229_035809_batch2.log ADDED
The diff for this file is too large to render. See raw diff
 
C/logs/C/9k_port8005_gpu0_20251229_035809_batch2.log.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 8377
C/logs/C/9k_port8005_gpu0_20251229_060615_batch2.log ADDED
The diff for this file is too large to render. See raw diff