narySt commited on
Commit
e98d476
·
verified ·
1 Parent(s): f0df813

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. train_hnet_with_docstring_18_04/.hydra/config.yaml +54 -0
  3. train_hnet_with_docstring_18_04/.hydra/hydra.yaml +160 -0
  4. train_hnet_with_docstring_18_04/.hydra/overrides.yaml +1 -0
  5. train_hnet_with_docstring_18_04/eval_results/.ipynb_checkpoints/metrics_checkpoint_step_3000-checkpoint.txt +17 -0
  6. train_hnet_with_docstring_18_04/eval_results/.ipynb_checkpoints/metrics_initial_checkpoint-checkpoint.txt +17 -0
  7. train_hnet_with_docstring_18_04/eval_results/eval_config.yaml +29 -0
  8. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_latest.txt +17 -0
  9. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_10591.txt +17 -0
  10. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_12000.txt +17 -0
  11. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_15000.txt +17 -0
  12. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_18000.txt +17 -0
  13. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_21000.txt +17 -0
  14. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_21182.txt +17 -0
  15. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_24000.txt +17 -0
  16. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_27000.txt +17 -0
  17. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_3000.txt +17 -0
  18. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_30000.txt +17 -0
  19. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_31773.txt +17 -0
  20. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_6000.txt +17 -0
  21. train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_9000.txt +17 -0
  22. train_hnet_with_docstring_18_04/eval_results/metrics_initial_checkpoint.txt +17 -0
  23. train_hnet_with_docstring_18_04/eval_results/metrics_model_best.txt +17 -0
  24. train_hnet_with_docstring_18_04/eval_results/metrics_model_final.txt +17 -0
  25. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_latest.txt +0 -0
  26. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_10591.txt +0 -0
  27. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_12000.txt +0 -0
  28. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_15000.txt +0 -0
  29. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_18000.txt +0 -0
  30. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_21000.txt +0 -0
  31. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_21182.txt +0 -0
  32. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_24000.txt +0 -0
  33. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_27000.txt +0 -0
  34. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_3000.txt +0 -0
  35. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_30000.txt +0 -0
  36. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_31773.txt +0 -0
  37. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_6000.txt +0 -0
  38. train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_9000.txt +0 -0
  39. train_hnet_with_docstring_18_04/eval_results/predictions_initial_checkpoint.txt +0 -0
  40. train_hnet_with_docstring_18_04/eval_results/predictions_model_best.txt +0 -0
  41. train_hnet_with_docstring_18_04/eval_results/predictions_model_final.txt +0 -0
  42. train_hnet_with_docstring_18_04/eval_results/summary.txt +22 -0
  43. train_hnet_with_docstring_18_04/model_best.pt +3 -0
  44. train_hnet_with_docstring_18_04/model_final.pt +3 -0
  45. train_hnet_with_docstring_18_04/train.log +0 -0
  46. train_hnet_with_docstring_18_04/wandb/run-20260417_085757-sa79g3yl/files/requirements.txt +245 -0
  47. train_hnet_with_docstring_18_04/wandb/run-20260417_085757-sa79g3yl/files/wandb-metadata.json +1 -0
  48. train_hnet_with_docstring_18_04/wandb/run-20260417_085757-sa79g3yl/logs/debug-internal.log +15 -0
  49. train_hnet_with_docstring_18_04/wandb/run-20260417_085757-sa79g3yl/logs/debug.log +24 -0
  50. train_hnet_with_docstring_18_04/wandb/run-20260417_085757-sa79g3yl/run-sa79g3yl.wandb +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ train_hnet_with_docstring_18_04/wandb/run-20260417_085757-sa79g3yl/run-sa79g3yl.wandb filter=lfs diff=lfs merge=lfs -text
train_hnet_with_docstring_18_04/.hydra/config.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ config_path: ${oc.env:PROJECT_ROOT}/hnet_project/configs/hnet_2stage_XL_code.json
3
+ checkpoint_path: ${oc.env:PROJECT_ROOT}/hnet_project/checkpoints/hnet_2stage_XL_code.pt
4
+ training:
5
+ epochs: 3
6
+ batch_size: 4
7
+ eval_batch_size: 24
8
+ gradient_accumulation_steps: 4
9
+ lr: 0.0001
10
+ weight_decay: 0.1
11
+ betas:
12
+ - 0.9
13
+ - 0.95
14
+ eps: 1.0e-08
15
+ lr_scheduler: wsd
16
+ warmup_ratio: 0.1
17
+ decay_ratio: 0.2
18
+ warmup_steps: 100
19
+ min_lr_ratio: 0.1
20
+ lr_multiplier:
21
+ - 2.0
22
+ - 1.5
23
+ - 1.0
24
+ load_balancing_weight: 0.01
25
+ load_balancing_N: 4.0
26
+ max_grad_norm: 1.0
27
+ use_amp: true
28
+ resume: false
29
+ resume_checkpoint: null
30
+ warmup_model: true
31
+ data:
32
+ path: ${oc.env:PROJECT_ROOT}/code_completion_exp/datasets/data_V5_full
33
+ max_context_len: 4096
34
+ max_target_len: 256
35
+ num_workers: 0
36
+ pin_memory: true
37
+ max_train_samples: null
38
+ max_val_samples: null
39
+ logging:
40
+ log_interval: 10
41
+ save_interval: 3000
42
+ eval_interval: 1000
43
+ save_every_epoch: true
44
+ tracking:
45
+ enabled: true
46
+ backend: wandb
47
+ project: code-completion-full-docstring
48
+ run_name: hnet_train
49
+ entity: null
50
+ base_url: https://wandb.platun0v.ru
51
+ paths:
52
+ output_dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
53
+ seed: 42
54
+ device: cuda
train_hnet_with_docstring_18_04/.hydra/hydra.yaml ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${paths.output_dir}
4
+ sweep:
5
+ dir: outputs/multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task: []
115
+ job:
116
+ name: train
117
+ chdir: false
118
+ override_dirname: ''
119
+ id: ???
120
+ num: ???
121
+ config_name: config
122
+ env_set: {}
123
+ env_copy: []
124
+ config:
125
+ override_dirname:
126
+ kv_sep: '='
127
+ item_sep: ','
128
+ exclude_keys: []
129
+ runtime:
130
+ version: 1.3.2
131
+ version_base: '1.3'
132
+ cwd: /workspace/byte-llms-code/code_completion_exp/train_hnet
133
+ config_sources:
134
+ - path: hydra.conf
135
+ schema: pkg
136
+ provider: hydra
137
+ - path: /workspace/byte-llms-code/code_completion_exp/train_hnet/configs
138
+ schema: file
139
+ provider: main
140
+ - path: ''
141
+ schema: structured
142
+ provider: schema
143
+ output_dir: /workspace/byte-llms-code/code_completion_exp/train_hnet/outputs/2026-04-17/08-57-56
144
+ choices:
145
+ paths: default
146
+ tracking: wandb
147
+ logging: default
148
+ data: default
149
+ training: default
150
+ model: hnet_xl_code
151
+ hydra/env: default
152
+ hydra/callbacks: null
153
+ hydra/job_logging: default
154
+ hydra/hydra_logging: default
155
+ hydra/hydra_help: default
156
+ hydra/help: default
157
+ hydra/sweeper: basic
158
+ hydra/launcher: basic
159
+ hydra/output: default
160
+ verbose: false
train_hnet_with_docstring_18_04/.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
train_hnet_with_docstring_18_04/eval_results/.ipynb_checkpoints/metrics_checkpoint_step_3000-checkpoint.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_3000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3265588423068738
5
+ token_accuracy: 0.437939395023843
6
+ bleu: 17.26934111421602
7
+ bpb: 1.3647181750481843
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1328.2266302730422
10
+ gen_samples_per_s: 28.302398960539023
11
+ gen_time_per_sample_ms: 35.332693931502504
12
+ gen_chars_per_s: 731.4956482993198
13
+ gen_batch_mean_ms: 1130.4056427855678
14
+ gen_batch_p50_ms: 1147.103229071945
15
+ gen_batch_p95_ms: 1343.7763461610302
16
+ gen_batch_max_ms: 1544.4510788656771
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/.ipynb_checkpoints/metrics_initial_checkpoint-checkpoint.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: hnet_2stage_XL_code.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.00042562247286656737
5
+ token_accuracy: 0.44197793742674885
6
+ bleu: 4.782346760579283
7
+ bpb: 1.9778437943839007
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1962.1507909195498
10
+ gen_samples_per_s: 19.15856832918674
11
+ gen_time_per_sample_ms: 52.19596698551686
12
+ gen_chars_per_s: 1225.6407668204554
13
+ gen_batch_mean_ms: 1662.1550524910476
14
+ gen_batch_p50_ms: 1646.254621911794
15
+ gen_batch_p95_ms: 1715.8207091037184
16
+ gen_batch_max_ms: 20510.621909983456
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/eval_config.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ config_path: /workspace/byte-llms-code/hnet_project/configs/hnet_2stage_XL_code.json
3
+ checkpoint_path: /workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt
4
+ data:
5
+ path: /workspace/byte-llms-code/code_completion_exp/datasets/data_V5_full
6
+ max_context_len: 4096
7
+ max_target_len: 256
8
+ num_workers: 0
9
+ pin_memory: true
10
+ max_train_samples: null
11
+ max_val_samples: null
12
+ paths:
13
+ checkpoints_dir: outputs/2026-04-17/08-57-56
14
+ initial_checkpoint: auto
15
+ output_dir: outputs/2026-04-17/08-57-56/eval_results
16
+ evaluation:
17
+ batch_size: 16
18
+ max_samples: null
19
+ compute_bpb: true
20
+ bleu_tokenize: none
21
+ use_amp: true
22
+ save_predictions: true
23
+ generation:
24
+ max_length: 64
25
+ temperature: 0.1
26
+ top_k: 0
27
+ top_p: 1.0
28
+ seed: 42
29
+ device: cuda
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_latest.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_latest.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3314535007448393
5
+ token_accuracy: 0.4422865987313573
6
+ bleu: 17.390015716696666
7
+ bpb: 1.3799796338028427
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1371.5999331497587
10
+ gen_samples_per_s: 27.407408743213683
11
+ gen_time_per_sample_ms: 36.48648470817618
12
+ gen_chars_per_s: 727.3775507621499
13
+ gen_batch_mean_ms: 1162.65390314459
14
+ gen_batch_p50_ms: 1180.899173952639
15
+ gen_batch_p95_ms: 1378.4097837517038
16
+ gen_batch_max_ms: 1539.6567089483142
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_10591.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_10591.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3369067886784422
5
+ token_accuracy: 0.4435060345521727
6
+ bleu: 18.150587601970468
7
+ bpb: 1.3515752053962538
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1318.3035029922612
10
+ gen_samples_per_s: 28.515436630999133
11
+ gen_time_per_sample_ms: 35.06872480826403
12
+ gen_chars_per_s: 729.9859234354543
13
+ gen_batch_mean_ms: 1121.9604280785202
14
+ gen_batch_p50_ms: 1138.8398550916463
15
+ gen_batch_p95_ms: 1339.150638342835
16
+ gen_batch_max_ms: 1520.291404100135
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_12000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_12000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.32953819961693975
5
+ token_accuracy: 0.44194036126792696
6
+ bleu: 17.409712977551532
7
+ bpb: 1.3706288980495067
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1358.1111189327203
10
+ gen_samples_per_s: 27.67962022838153
11
+ gen_time_per_sample_ms: 36.127663304232826
12
+ gen_chars_per_s: 737.8254886739063
13
+ gen_batch_mean_ms: 1155.8392501555065
14
+ gen_batch_p50_ms: 1174.8729590326548
15
+ gen_batch_p95_ms: 1358.717922004871
16
+ gen_batch_max_ms: 1544.2728700581938
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_15000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_15000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.33560331985528835
5
+ token_accuracy: 0.445342792982205
6
+ bleu: 17.80188542846622
7
+ bpb: 1.3634364831092445
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1354.7123250523582
10
+ gen_samples_per_s: 27.749064731176126
11
+ gen_time_per_sample_ms: 36.03725061322511
12
+ gen_chars_per_s: 733.6996804554685
13
+ gen_batch_mean_ms: 1151.095885965141
14
+ gen_batch_p50_ms: 1165.4436129610986
15
+ gen_batch_p95_ms: 1363.1549226120112
16
+ gen_batch_max_ms: 1552.3370699957013
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_18000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_18000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.33656097041923816
5
+ token_accuracy: 0.44532847825503474
6
+ bleu: 17.75754919808768
7
+ bpb: 1.3630499140307977
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1351.9282269885298
10
+ gen_samples_per_s: 27.806209863475942
11
+ gen_time_per_sample_ms: 35.963189694310756
12
+ gen_chars_per_s: 733.1284162975089
13
+ gen_batch_mean_ms: 1150.5772144583232
14
+ gen_batch_p50_ms: 1165.5358579009771
15
+ gen_batch_p95_ms: 1360.9706948278472
16
+ gen_batch_max_ms: 1532.5795689132065
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_21000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_21000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.33586933390082996
5
+ token_accuracy: 0.4456639796730874
6
+ bleu: 17.890089292040738
7
+ bpb: 1.3623578011160296
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1358.3291775202379
10
+ gen_samples_per_s: 27.675176696585325
11
+ gen_time_per_sample_ms: 36.133463968935885
12
+ gen_chars_per_s: 732.6714440580971
13
+ gen_batch_mean_ms: 1154.565481636277
14
+ gen_batch_p50_ms: 1167.435775976628
15
+ gen_batch_p95_ms: 1375.0762917567044
16
+ gen_batch_max_ms: 1508.0128121189773
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_21182.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_21182.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3359757395190466
5
+ token_accuracy: 0.4453508450162383
6
+ bleu: 17.700113254581588
7
+ bpb: 1.3621293462625146
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1357.277649092488
10
+ gen_samples_per_s: 27.696617582360552
11
+ gen_time_per_sample_ms: 36.10549183582911
12
+ gen_chars_per_s: 731.3654657642988
13
+ gen_batch_mean_ms: 1148.2316075352596
14
+ gen_batch_p50_ms: 1163.495829096064
15
+ gen_batch_p95_ms: 1363.2275794865564
16
+ gen_batch_max_ms: 1515.0637179613113
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_24000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_24000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3315333049585018
5
+ token_accuracy: 0.4435024558703801
6
+ bleu: 17.3520067636136
7
+ bpb: 1.3781568520216898
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1368.3171897311695
10
+ gen_samples_per_s: 27.473162130913245
11
+ gen_time_per_sample_ms: 36.39915912245077
12
+ gen_chars_per_s: 733.2883102909277
13
+ gen_batch_mean_ms: 1161.236023812535
14
+ gen_batch_p50_ms: 1180.4353760089725
15
+ gen_batch_p95_ms: 1372.9352780152112
16
+ gen_batch_max_ms: 1518.2082359679043
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_27000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_27000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3306820600127687
5
+ token_accuracy: 0.44246642749143356
6
+ bleu: 17.28689580869071
7
+ bpb: 1.379878546557371
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1372.4666455930565
10
+ gen_samples_per_s: 27.390100969452796
11
+ gen_time_per_sample_ms: 36.50954047651246
12
+ gen_chars_per_s: 729.034838997957
13
+ gen_batch_mean_ms: 1163.5719769113161
14
+ gen_batch_p50_ms: 1181.47262907587
15
+ gen_batch_p95_ms: 1373.7670538946986
16
+ gen_batch_max_ms: 1530.0602258648723
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_3000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_3000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3265588423068738
5
+ token_accuracy: 0.437939395023843
6
+ bleu: 17.26934111421602
7
+ bpb: 1.3647181750481843
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1328.2266302730422
10
+ gen_samples_per_s: 28.302398960539023
11
+ gen_time_per_sample_ms: 35.332693931502504
12
+ gen_chars_per_s: 731.4956482993198
13
+ gen_batch_mean_ms: 1130.4056427855678
14
+ gen_batch_p50_ms: 1147.103229071945
15
+ gen_batch_p95_ms: 1343.7763461610302
16
+ gen_batch_max_ms: 1544.4510788656771
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_30000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_30000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3316663119812726
5
+ token_accuracy: 0.4430926968051318
6
+ bleu: 17.327071481779033
7
+ bpb: 1.380080428458689
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1370.579366396647
10
+ gen_samples_per_s: 27.427816966800037
11
+ gen_time_per_sample_ms: 36.45933619910212
12
+ gen_chars_per_s: 731.2644744061805
13
+ gen_batch_mean_ms: 1166.450524592891
14
+ gen_batch_p50_ms: 1182.7268451452255
15
+ gen_batch_p95_ms: 1375.3103922354057
16
+ gen_batch_max_ms: 1513.1844920106232
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_31773.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_31773.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3321451372632475
5
+ token_accuracy: 0.44243600869619676
6
+ bleu: 17.34043772632883
7
+ bpb: 1.3799796338028427
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1366.8325954594184
10
+ gen_samples_per_s: 27.50300228782927
11
+ gen_time_per_sample_ms: 36.359666829629134
12
+ gen_chars_per_s: 729.8084661675138
13
+ gen_batch_mean_ms: 1163.2617833697177
14
+ gen_batch_p50_ms: 1180.2458260208368
15
+ gen_batch_p95_ms: 1379.442374384962
16
+ gen_batch_max_ms: 1505.9127290733159
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_6000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_6000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3314268993402852
5
+ token_accuracy: 0.44133109069274334
6
+ bleu: 17.71897114578991
7
+ bpb: 1.3570473840195383
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1331.4521436940413
10
+ gen_samples_per_s: 28.233834898266075
11
+ gen_time_per_sample_ms: 35.41849711890938
12
+ gen_chars_per_s: 731.7867221999805
13
+ gen_batch_mean_ms: 1133.1507605906734
14
+ gen_batch_p50_ms: 1147.1740510314703
15
+ gen_batch_p95_ms: 1350.74239235837
16
+ gen_batch_max_ms: 1578.9136730600148
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_checkpoint_step_9000.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: checkpoint_step_9000.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3374388167695254
5
+ token_accuracy: 0.44255768387714384
6
+ bleu: 18.095023606139048
7
+ bpb: 1.3526477022435122
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1328.4889196834993
10
+ gen_samples_per_s: 28.296811093430843
11
+ gen_time_per_sample_ms: 35.339671198220344
12
+ gen_chars_per_s: 728.5706231035733
13
+ gen_batch_mean_ms: 1130.6288678157441
14
+ gen_batch_p50_ms: 1148.1934499461204
15
+ gen_batch_p95_ms: 1352.066579903476
16
+ gen_batch_max_ms: 1580.2085960749537
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_initial_checkpoint.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: hnet_2stage_XL_code.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.00042562247286656737
5
+ token_accuracy: 0.44197793742674885
6
+ bleu: 4.782346760579283
7
+ bpb: 1.9778437943839007
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1962.1507909195498
10
+ gen_samples_per_s: 19.15856832918674
11
+ gen_time_per_sample_ms: 52.19596698551686
12
+ gen_chars_per_s: 1225.6407668204554
13
+ gen_batch_mean_ms: 1662.1550524910476
14
+ gen_batch_p50_ms: 1646.254621911794
15
+ gen_batch_p95_ms: 1715.8207091037184
16
+ gen_batch_max_ms: 20510.621909983456
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_model_best.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: model_best.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3375186209831879
5
+ token_accuracy: 0.4429110787041593
6
+ bleu: 18.031857027994704
7
+ bpb: 1.3521020891823154
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1318.3916445451323
10
+ gen_samples_per_s: 28.51353022111262
11
+ gen_time_per_sample_ms: 35.07106949736998
12
+ gen_chars_per_s: 733.5983992326425
13
+ gen_batch_mean_ms: 1122.03544216607
14
+ gen_batch_p50_ms: 1138.4079209528863
15
+ gen_batch_p95_ms: 1340.2311942540107
16
+ gen_batch_max_ms: 1512.969312025234
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/metrics_model_final.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Checkpoint: model_final.pt
2
+ ================================================================================
3
+
4
+ exact_match: 0.3311342838901894
5
+ token_accuracy: 0.442775088796042
6
+ bleu: 17.367347972897164
7
+ bpb: 1.3799796338028427
8
+ num_samples: 37592
9
+ gen_wall_time_s: 1363.505116418004
10
+ gen_samples_per_s: 27.570120234499786
11
+ gen_time_per_sample_ms: 36.27115121350298
12
+ gen_chars_per_s: 731.4141971244836
13
+ gen_batch_mean_ms: 1159.4888140699093
14
+ gen_batch_p50_ms: 1178.2626849599183
15
+ gen_batch_p95_ms: 1370.1597674982622
16
+ gen_batch_max_ms: 1537.723605055362
17
+ gen_num_batches: 1175
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_latest.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_10591.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_12000.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_15000.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_18000.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_21000.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_21182.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_24000.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_27000.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_3000.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_30000.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_31773.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_6000.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_checkpoint_step_9000.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_initial_checkpoint.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_model_best.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/predictions_model_final.txt ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/eval_results/summary.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EVALUATION SUMMARY
2
+ ==================================================================================================
3
+
4
+ Checkpoint Exact Match Token Acc BLEU BPB ms/sample samp/s
5
+ --------------------------------------------------------------------------------------------------
6
+ initial_checkpoint 0.04% 44.20% 4.78 1.98 52.2 19.16
7
+ checkpoint_latest 33.15% 44.23% 17.39 1.38 36.5 27.41
8
+ checkpoint_step_10591 33.69% 44.35% 18.15 1.35 35.1 28.52
9
+ checkpoint_step_12000 32.95% 44.19% 17.41 1.37 36.1 27.68
10
+ checkpoint_step_15000 33.56% 44.53% 17.80 1.36 36.0 27.75
11
+ checkpoint_step_18000 33.66% 44.53% 17.76 1.36 36.0 27.81
12
+ checkpoint_step_21000 33.59% 44.57% 17.89 1.36 36.1 27.68
13
+ checkpoint_step_21182 33.60% 44.54% 17.70 1.36 36.1 27.70
14
+ checkpoint_step_24000 33.15% 44.35% 17.35 1.38 36.4 27.47
15
+ checkpoint_step_27000 33.07% 44.25% 17.29 1.38 36.5 27.39
16
+ checkpoint_step_3000 32.66% 43.79% 17.27 1.36 35.3 28.30
17
+ checkpoint_step_30000 33.17% 44.31% 17.33 1.38 36.5 27.43
18
+ checkpoint_step_31773 33.21% 44.24% 17.34 1.38 36.4 27.50
19
+ checkpoint_step_6000 33.14% 44.13% 17.72 1.36 35.4 28.23
20
+ checkpoint_step_9000 33.74% 44.26% 18.10 1.35 35.3 28.30
21
+ model_best 33.75% 44.29% 18.03 1.35 35.1 28.51
22
+ model_final 33.11% 44.28% 17.37 1.38 36.3 27.57
train_hnet_with_docstring_18_04/model_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e55307aa30def4fd79bd523e9cb1e50e109f3fe99acb2d8651ed405569a999e5
3
+ size 3315165139
train_hnet_with_docstring_18_04/model_final.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cdefe95504a2ed0d743929eb0d1af737bda1be80273a53c6672c687a0197762
3
+ size 3315165484
train_hnet_with_docstring_18_04/train.log ADDED
The diff for this file is too large to render. See raw diff
 
train_hnet_with_docstring_18_04/wandb/run-20260417_085757-sa79g3yl/files/requirements.txt ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools==78.1.1
2
+ wheel==0.45.1
3
+ pip==25.2
4
+ webencodings==0.5.1
5
+ triton==3.2.0
6
+ pytz==2025.2
7
+ pydub==0.25.1
8
+ pure_eval==0.2.3
9
+ ptyprocess==0.7.0
10
+ nvidia-ml-py==13.590.48
11
+ nvidia-cusparselt-cu12==0.6.2
12
+ mpmath==1.3.0
13
+ ipython-genutils==0.2.0
14
+ fastjsonschema==2.21.2
15
+ brotli==1.2.0
16
+ antlr4-python3-runtime==4.9.3
17
+ xxhash==3.6.0
18
+ widgetsnbextension==4.0.14
19
+ websocket-client==1.9.0
20
+ webcolors==24.11.1
21
+ wcwidth==0.2.14
22
+ urllib3==2.5.0
23
+ uri-template==1.3.0
24
+ tzdata==2025.2
25
+ typing_extensions==4.15.0
26
+ types-python-dateutil==2.9.0.20251008
27
+ traitlets==5.14.3
28
+ tqdm==4.67.1
29
+ tornado==6.5.2
30
+ tomlkit==0.13.3
31
+ tinycss2==1.4.0
32
+ tabulate==0.9.0
33
+ sympy==1.13.1
34
+ soupsieve==2.8
35
+ sniffio==1.3.1
36
+ smmap==5.0.2
37
+ six==1.17.0
38
+ shellingham==1.5.4
39
+ Send2Trash==1.8.3
40
+ semantic-version==2.10.0
41
+ safetensors==0.6.2
42
+ rpds-py==0.27.1
43
+ rfc3986-validator==0.1.1
44
+ regex==2025.9.18
45
+ pyzmq==27.1.0
46
+ PyYAML==6.0.3
47
+ python-multipart==0.0.22
48
+ python-json-logger==4.0.0
49
+ python-dotenv==1.2.1
50
+ pyparsing==3.2.5
51
+ PyJWT==2.8.0
52
+ Pygments==2.19.2
53
+ pycparser==2.23
54
+ pyarrow==22.0.0
55
+ psutil==7.1.0
56
+ protobuf==6.33.4
57
+ propcache==0.4.1
58
+ prometheus_client==0.23.1
59
+ portalocker==3.2.0
60
+ platformdirs==4.5.0
61
+ pillow==11.3.0
62
+ pexpect==4.9.0
63
+ pathspec==1.0.4
64
+ parso==0.8.5
65
+ pandocfilters==1.5.1
66
+ packaging==25.0
67
+ orjson==3.11.6
68
+ opt_einsum==3.4.0
69
+ nvidia-nvtx-cu12==12.4.127
70
+ nvidia-nvjitlink-cu12==12.4.127
71
+ nvidia-nccl-cu12==2.21.5
72
+ nvidia-curand-cu12==10.3.5.147
73
+ nvidia-cufile-cu12==1.13.1.3
74
+ nvidia-cufft-cu12==11.2.1.3
75
+ nvidia-cuda-runtime-cu12==12.4.127
76
+ nvidia-cuda-nvrtc-cu12==12.4.127
77
+ nvidia-cuda-cupti-cu12==12.4.127
78
+ nvidia-cublas-cu12==12.4.5.8
79
+ numpy==2.3.3
80
+ ninja==1.13.0
81
+ networkx==3.5
82
+ nest-asyncio==1.6.0
83
+ narwhals==2.15.0
84
+ mypy_extensions==1.1.0
85
+ multidict==6.7.0
86
+ mistune==3.1.4
87
+ mdurl==0.1.2
88
+ MarkupSafe==3.0.3
89
+ lxml==6.0.2
90
+ librt==0.8.0
91
+ lark==1.3.0
92
+ kiwisolver==1.4.9
93
+ jupyterlab_widgets==3.0.15
94
+ jupyterlab_pygments==0.3.0
95
+ jsonpointer==3.0.0
96
+ json5==0.12.1
97
+ itsdangerous==2.2.0
98
+ idna==3.10
99
+ hf-xet==1.1.10
100
+ h11==0.16.0
101
+ groovy==0.1.2
102
+ fsspec==2025.9.0
103
+ frozenlist==1.8.0
104
+ fqdn==1.5.1
105
+ fonttools==4.60.1
106
+ filelock==3.19.1
107
+ ffmpy==1.0.0
108
+ executing==2.2.1
109
+ einops==0.8.1
110
+ dill==0.4.0
111
+ defusedxml==0.7.1
112
+ decorator==5.2.1
113
+ debugpy==1.8.17
114
+ dacite==1.9.2
115
+ cycler==0.12.1
116
+ comm==0.2.3
117
+ colorama==0.4.6
118
+ click==8.3.1
119
+ charset-normalizer==3.4.3
120
+ certifi==2025.10.5
121
+ bleach==6.2.0
122
+ babel==2.17.0
123
+ attrs==25.4.0
124
+ async-lru==2.0.5
125
+ asttokens==3.0.0
126
+ annotated-types==0.7.0
127
+ annotated-doc==0.0.4
128
+ aiohappyeyeballs==2.6.1
129
+ aiofiles==24.1.0
130
+ yarl==1.22.0
131
+ uvicorn==0.40.0
132
+ typing-inspection==0.4.2
133
+ terminado==0.18.1
134
+ stack-data==0.6.3
135
+ sentry-sdk==2.50.0
136
+ scipy==1.17.0
137
+ sacrebleu==2.6.0
138
+ rfc3987-syntax==1.1.0
139
+ rfc3339-validator==0.1.4
140
+ requests==2.32.5
141
+ reportlab==4.4.9
142
+ referencing==0.36.2
143
+ python-dateutil==2.9.0.post0
144
+ pydantic_core==2.41.5
145
+ prompt_toolkit==3.0.52
146
+ plotly==6.5.2
147
+ pathlib2==2.3.7.post1
148
+ orderedmultidict==1.0.2
149
+ optree==0.17.0
150
+ omegaconf==2.3.0
151
+ nvidia-cusparse-cu12==12.3.1.170
152
+ nvidia-cudnn-cu12==9.1.0.70
153
+ mypy==1.19.1
154
+ multiprocess==0.70.16
155
+ matplotlib-inline==0.1.7
156
+ markdown-it-py==4.0.0
157
+ jupyter_core==5.8.1
158
+ Jinja2==3.1.6
159
+ jedi==0.19.2
160
+ ipython_pygments_lexers==1.1.1
161
+ httpcore==1.0.9
162
+ gitdb==4.0.12
163
+ ftfy==6.3.1
164
+ contourpy==1.3.3
165
+ cffi==2.0.0
166
+ beautifulsoup4==4.14.2
167
+ anyio==4.11.0
168
+ aiosignal==1.4.0
169
+ starlette==0.50.0
170
+ rich==14.2.0
171
+ pydantic==2.12.5
172
+ pandas==2.3.3
173
+ nvidia-cusolver-cu12==11.6.1.9
174
+ matplotlib==3.10.7
175
+ jupyter_server_terminals==0.5.3
176
+ jupyter_client==8.6.3
177
+ jsonschema-specifications==2025.9.1
178
+ ipython==9.6.0
179
+ hydra-core==1.3.2
180
+ huggingface-hub==0.35.3
181
+ httpx==0.28.1
182
+ GitPython==3.1.46
183
+ furl==2.1.4
184
+ cryptography==46.0.4
185
+ arrow==1.3.0
186
+ argon2-cffi-bindings==25.1.0
187
+ aiohttp==3.13.1
188
+ wandb==0.24.0
189
+ typer==0.21.1
190
+ torch==2.6.0
191
+ tokenizers==0.22.1
192
+ seaborn==0.13.2
193
+ safehttpx==0.1.7
194
+ jsonschema==4.25.1
195
+ joypy==0.2.6
196
+ isoduration==20.11.0
197
+ ipywidgets==8.1.7
198
+ ipykernel==6.30.1
199
+ gradio_client==2.0.3
200
+ fastapi==0.128.0
201
+ Authlib==1.6.6
202
+ argon2-cffi==25.1.0
203
+ transformers==4.57.6
204
+ nbformat==5.10.4
205
+ mlstm_kernels==2.0.2
206
+ jupyter-console==6.6.3
207
+ gradio==6.5.1
208
+ datasets==4.3.0
209
+ clearml==1.16.4
210
+ accelerate==1.10.1
211
+ xlstm==2.0.4
212
+ nbclient==0.10.2
213
+ jupyter-events==0.12.0
214
+ trackio==0.15.0
215
+ nbconvert==7.16.6
216
+ jupyter_server==2.17.0
217
+ notebook_shim==0.2.4
218
+ jupyterlab_server==2.27.3
219
+ jupyter-lsp==2.3.0
220
+ nbclassic==1.3.3
221
+ jupyterlab==4.4.9
222
+ notebook==7.4.7
223
+ jupyter_contrib_core==0.4.2
224
+ jupyter==1.1.1
225
+ jupyter_nbextensions_configurator==0.6.4
226
+ causal-conv1d==1.5.0.post8
227
+ flash_attn==2.7.4.post1
228
+ mamba-ssm==2.2.4
229
+ hnet==0.0.1
230
+ autocommand==2.2.2
231
+ backports.tarfile==1.2.0
232
+ importlib_metadata==8.0.0
233
+ inflect==7.3.1
234
+ jaraco.collections==5.1.0
235
+ jaraco.context==5.3.0
236
+ jaraco.functools==4.0.1
237
+ jaraco.text==3.12.1
238
+ more-itertools==10.3.0
239
+ packaging==24.2
240
+ platformdirs==4.2.2
241
+ tomli==2.0.1
242
+ typeguard==4.3.0
243
+ typing_extensions==4.12.2
244
+ wheel==0.45.1
245
+ zipp==3.19.2
train_hnet_with_docstring_18_04/wandb/run-20260417_085757-sa79g3yl/files/wandb-metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"os": "Linux-5.15.0-173-generic-x86_64-with-glibc2.39", "python": "CPython 3.12.0", "started_at": "2026-04-17T08:57:57.464191Z", "program": "/workspace/byte-llms-code/code_completion_exp/train_hnet/train.py", "code_path": "code_completion_exp/train_hnet/train.py", "code_path_local": "train.py", "git": {"remote_url": "https://github.com/naryst/byte-llms-code.git", "commit": "056a135fbb34bc28ed3adfeeb2f4ac97cbf12a89"}, "email": "nikita@local.ru", "root": "/workspace/byte-llms-code/code_completion_exp/train_hnet", "host": "3e675e030992", "executable": "/venv/bytellm/bin/python", "cpu_count": 112, "cpu_count_logical": 224, "gpu_type": "NVIDIA H100 80GB HBM3", "gpu_count": 2, "disk": {"/": {"total": "244813135872", "used": "36382741504"}}, "memory": {"total": "1622968434688"}, "gpu_nvidia": [{"name": "NVIDIA H100 80GB HBM3", "memory_total": "85520809984", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-3c87d2f8-c595-49bd-bb1d-1ebfd19c6fb0"}, {"name": "NVIDIA H100 80GB HBM3", "memory_total": "85520809984", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-beb9a6b0-ebef-1f4c-d886-465c96f57ca4"}], "cuda_version": "12.9", "writer_id": "1enfm68bplbg421e1aqnc3guby2j3hk6"}
train_hnet_with_docstring_18_04/wandb/run-20260417_085757-sa79g3yl/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-04-17T08:57:57.764884416Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-04-17T08:57:58.240853232Z","level":"INFO","msg":"stream: created new stream","id":"sa79g3yl"}
3
+ {"time":"2026-04-17T08:57:58.241001941Z","level":"INFO","msg":"handler: started","stream_id":"sa79g3yl"}
4
+ {"time":"2026-04-17T08:57:58.241279701Z","level":"INFO","msg":"stream: started","id":"sa79g3yl"}
5
+ {"time":"2026-04-17T08:57:58.241368206Z","level":"INFO","msg":"writer: started","stream_id":"sa79g3yl"}
6
+ {"time":"2026-04-17T08:57:58.241360163Z","level":"INFO","msg":"sender: started","stream_id":"sa79g3yl"}
7
+ {"time":"2026-04-17T08:57:58.392376272Z","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
8
+ {"time":"2026-04-17T12:02:07.081755663Z","level":"ERROR","msg":"api: HTTP error","status":403,"method":"POST","url":"https://wandb.platun0v.ru/files/nikita/code-completion-full-docstring/sa79g3yl/file_stream"}
9
+ {"time":"2026-04-17T12:02:07.082162122Z","level":"ERROR+4","msg":"filestream: fatal error: filestream: failed to upload: 403 Forbidden url=https://wandb.platun0v.ru/files/nikita/code-completion-full-docstring/sa79g3yl/file_stream: "}
10
+ {"time":"2026-04-17T15:45:07.499206911Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
11
+ {"time":"2026-04-17T15:45:07.50174734Z","level":"INFO","msg":"handler: operation stats","stats":{}}
12
+ {"time":"2026-04-17T15:45:07.50517879Z","level":"INFO","msg":"stream: closing","id":"sa79g3yl"}
13
+ {"time":"2026-04-17T15:45:07.505192348Z","level":"INFO","msg":"handler: closed","stream_id":"sa79g3yl"}
14
+ {"time":"2026-04-17T15:45:07.505334051Z","level":"INFO","msg":"sender: closed","stream_id":"sa79g3yl"}
15
+ {"time":"2026-04-17T15:45:07.505346596Z","level":"INFO","msg":"stream: closed","id":"sa79g3yl"}
train_hnet_with_docstring_18_04/wandb/run-20260417_085757-sa79g3yl/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-04-17 08:57:57,465 INFO MainThread:14733 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0
2
+ 2026-04-17 08:57:57,465 INFO MainThread:14733 [wandb_setup.py:_flush():81] Configure stats pid to 14733
3
+ 2026-04-17 08:57:57,465 INFO MainThread:14733 [wandb_setup.py:_flush():81] Loading settings from environment variables
4
+ 2026-04-17 08:57:57,465 INFO MainThread:14733 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /workspace/byte-llms-code/code_completion_exp/train_hnet/wandb/run-20260417_085757-sa79g3yl/logs/debug.log
5
+ 2026-04-17 08:57:57,466 INFO MainThread:14733 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /workspace/byte-llms-code/code_completion_exp/train_hnet/wandb/run-20260417_085757-sa79g3yl/logs/debug-internal.log
6
+ 2026-04-17 08:57:57,466 INFO MainThread:14733 [wandb_init.py:init():844] calling init triggers
7
+ 2026-04-17 08:57:57,466 INFO MainThread:14733 [wandb_init.py:init():849] wandb.init called with sweep_config: {}
8
+ config: {'model': {'config_path': '/workspace/byte-llms-code/hnet_project/configs/hnet_2stage_XL_code.json', 'checkpoint_path': '/workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt'}, 'training': {'epochs': 3, 'batch_size': 4, 'eval_batch_size': 24, 'gradient_accumulation_steps': 4, 'lr': 0.0001, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'lr_multiplier': [2.0, 1.5, 1.0], 'load_balancing_weight': 0.01, 'load_balancing_N': 4.0, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None, 'warmup_model': True}, 'data': {'path': '/workspace/byte-llms-code/code_completion_exp/datasets/data_V5_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 0, 'pin_memory': True, 'max_train_samples': None, 'max_val_samples': None}, 'logging': {'log_interval': 10, 'save_interval': 3000, 'eval_interval': 1000, 'save_every_epoch': True}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion-full-docstring', 'run_name': 'hnet_train', 'entity': None, 'base_url': 'https://wandb.platun0v.ru'}, 'paths': {'output_dir': 'outputs/2026-04-17/08-57-56'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_hnet/train.py'}}
9
+ 2026-04-17 08:57:57,466 INFO MainThread:14733 [wandb_init.py:init():892] starting backend
10
+ 2026-04-17 08:57:57,736 INFO MainThread:14733 [wandb_init.py:init():895] sending inform_init request
11
+ 2026-04-17 08:57:57,761 INFO MainThread:14733 [wandb_init.py:init():903] backend started and connected
12
+ 2026-04-17 08:57:57,767 INFO MainThread:14733 [wandb_init.py:init():973] updated telemetry
13
+ 2026-04-17 08:57:57,799 INFO MainThread:14733 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout
14
+ 2026-04-17 08:57:58,390 INFO MainThread:14733 [wandb_init.py:init():1044] starting run threads in backend
15
+ 2026-04-17 08:57:58,538 INFO MainThread:14733 [wandb_run.py:_console_start():2529] atexit reg
16
+ 2026-04-17 08:57:58,538 INFO MainThread:14733 [wandb_run.py:_redirect():2377] redirect: wrap_raw
17
+ 2026-04-17 08:57:58,538 INFO MainThread:14733 [wandb_run.py:_redirect():2446] Wrapping output streams.
18
+ 2026-04-17 08:57:58,538 INFO MainThread:14733 [wandb_run.py:_redirect():2469] Redirects installed.
19
+ 2026-04-17 08:57:58,541 INFO MainThread:14733 [wandb_init.py:init():1084] run started, returning control to user process
20
+ 2026-04-17 15:45:06,141 INFO MainThread:14733 [wandb_run.py:_finish():2295] finishing run nikita/code-completion-full-docstring/sa79g3yl
21
+ 2026-04-17 15:45:06,141 INFO MainThread:14733 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0
22
+ 2026-04-17 15:45:06,141 INFO MainThread:14733 [wandb_run.py:_restore():2476] restore
23
+ 2026-04-17 15:45:06,141 INFO MainThread:14733 [wandb_run.py:_restore():2482] restore done
24
+ 2026-04-17 15:45:07,504 INFO MainThread:14733 [wandb_run.py:_footer_sync_info():3870] logging synced files
train_hnet_with_docstring_18_04/wandb/run-20260417_085757-sa79g3yl/run-sa79g3yl.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ddbba5f49bfcc57498f835024667a996817dcfda0a3b48ee53acb818ba76b5
3
+ size 7745921