nvan13 commited on
Commit
a0d95b0
·
verified ·
1 Parent(s): 4386fa0

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +10 -0
  2. .gitignore +216 -0
  3. README.md +3 -0
  4. SVD64_llama2/config.json +96 -0
  5. SVD64_llama2/generation_config.json +6 -0
  6. SVD64_llama2/pytorch_model-00001-of-00006.bin +3 -0
  7. SVD64_llama2/pytorch_model-00002-of-00006.bin +3 -0
  8. SVD64_llama2/pytorch_model-00003-of-00006.bin +3 -0
  9. SVD64_llama2/pytorch_model-00004-of-00006.bin +3 -0
  10. SVD64_llama2/pytorch_model-00005-of-00006.bin +3 -0
  11. SVD64_llama2/pytorch_model-00006-of-00006.bin +3 -0
  12. SVD64_llama2/pytorch_model.bin.index.json +0 -0
  13. SVD_llama2/config.json +80 -0
  14. SVD_llama2/generation_config.json +6 -0
  15. SVD_llama2/pytorch_model-00001-of-00006.bin +3 -0
  16. SVD_llama2/pytorch_model-00002-of-00006.bin +3 -0
  17. SVD_llama2/pytorch_model-00003-of-00006.bin +3 -0
  18. SVD_llama2/pytorch_model-00004-of-00006.bin +3 -0
  19. SVD_llama2/pytorch_model-00005-of-00006.bin +3 -0
  20. SVD_llama2/pytorch_model-00006-of-00006.bin +3 -0
  21. SVD_llama2/pytorch_model.bin.index.json +0 -0
  22. bash_scripts/all.sh +3 -0
  23. bash_scripts/cms_eval.sh +18 -0
  24. bash_scripts/ft.sh +1 -0
  25. bash_scripts/main.sh +32 -0
  26. conf_hydra/config.yaml +16 -0
  27. conf_hydra/data/cifa.yaml +3 -0
  28. conf_hydra/data/cs15k.yaml +2 -0
  29. conf_hydra/data/faci.yaml +2 -0
  30. conf_hydra/data/math7k.yaml +2 -0
  31. config_draccus/config.yaml +39 -0
  32. dataset/AQuA/AQuA.json +0 -0
  33. dataset/AQuA/aqua_1.json +0 -0
  34. dataset/AQuA/test.json +0 -0
  35. dataset/ARC-Challenge/test.json +0 -0
  36. dataset/ARC-Challenge/train.json +0 -0
  37. dataset/ARC-Easy/test.json +0 -0
  38. dataset/ARC-Easy/train.json +0 -0
  39. dataset/AddSub/AddSub.json +0 -0
  40. dataset/AddSub/addsub_1.json +0 -0
  41. dataset/AddSub/test.json +0 -0
  42. dataset/MultiArith/MultiArith.json +0 -0
  43. dataset/MultiArith/multiarith_1.json +0 -0
  44. dataset/MultiArith/test.json +0 -0
  45. dataset/SVAMP/SVAMP.json +0 -0
  46. dataset/SVAMP/svamp_1.json +0 -0
  47. dataset/SVAMP/test.json +0 -0
  48. dataset/SingleEq/SingleEq.json +0 -0
  49. dataset/SingleEq/singleeq_1.json +0 -0
  50. dataset/SingleEq/test.json +0 -0
.gitattributes CHANGED
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ dataset/hellaswag/test.json filter=lfs diff=lfs merge=lfs -text
37
+ dataset/hellaswag/train.json filter=lfs diff=lfs merge=lfs -text
38
+ dataset/social_i_qa/train.json filter=lfs diff=lfs merge=lfs -text
39
+ dataset/winogrande/train.json filter=lfs diff=lfs merge=lfs -text
40
+ ft-training_set/alpaca_data.json filter=lfs diff=lfs merge=lfs -text
41
+ ft-training_set/alpaca_data_cleaned.json filter=lfs diff=lfs merge=lfs -text
42
+ ft-training_set/commonsense_147k.json filter=lfs diff=lfs merge=lfs -text
43
+ ft-training_set/commonsense_170k.json filter=lfs diff=lfs merge=lfs -text
44
+ ft-training_set/math_14k.json filter=lfs diff=lfs merge=lfs -text
45
+ ft-training_set/math_50k.json filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
208
+
209
+ .DS_Store
210
+
211
+ trainer_output/
212
+ outputs/
213
+ src/note.ipynb
214
+ wandb/
215
+ runs/
216
+ src/*.ipynb
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Instance-based-FT
2
+
3
+
SVD64_llama2/config.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "IbaXs_LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 11008,
15
+ "main_cfg": {
16
+ "data": {
17
+ "data_path": "ft-training_set/commonsense_15k.json",
18
+ "dataset_name": "CMS",
19
+ "val_set_size": 512
20
+ },
21
+ "hyperxs": {
22
+ "drop_out": 0.0,
23
+ "latent_feature_dim": 512,
24
+ "layer_embed_dim": 48,
25
+ "layer_norm_epsilon": 1e-05,
26
+ "lora_attn_dim": 64,
27
+ "module_embed_dim": 16,
28
+ "modules_per_layer": 7,
29
+ "n_cross_attn_tokens": 8,
30
+ "out_proj_dim": 128
31
+ },
32
+ "infer": {
33
+ "datasets": [
34
+ "boolq"
35
+ ],
36
+ "eval_batch_size": 128,
37
+ "is_json": true,
38
+ "model_path": ""
39
+ },
40
+ "model": {
41
+ "base_model_name": "meta-llama/Llama-2-7b-hf",
42
+ "cutoff_len": 512,
43
+ "train_on_inputs": false
44
+ },
45
+ "run_text": "def",
46
+ "seed": 42,
47
+ "training": {
48
+ "bf16": false,
49
+ "bf16_full_eval": false,
50
+ "dataloader_num_workers": 4,
51
+ "dataloader_persistent_workers": true,
52
+ "dataloader_pin_memory": true,
53
+ "dataloader_prefetch_factor": 1,
54
+ "eval_delay": 0,
55
+ "eval_steps": 1,
56
+ "eval_strategy": "steps",
57
+ "gradient_accumulation_steps": 1,
58
+ "gradient_checkpointing": false,
59
+ "gradient_checkpointing_kwargs": {
60
+ "use_reentrant": false
61
+ },
62
+ "learning_rate": 1e-36,
63
+ "load_best_model_at_end": true,
64
+ "logging_steps": 1,
65
+ "lr_scheduler_type": "cosine",
66
+ "max_steps": -1,
67
+ "num_train_epochs": 3.0,
68
+ "num_workers": 2,
69
+ "optim": "adamw_torch",
70
+ "output_dir": "exps",
71
+ "per_device_eval_batch_size": 32,
72
+ "per_device_train_batch_size": 16,
73
+ "report_to": "none",
74
+ "resume_from_checkpoint": false,
75
+ "save_safetensors": false,
76
+ "save_steps": 1.0,
77
+ "save_strategy": "steps",
78
+ "save_total_limit": 1,
79
+ "warmup_ratio": 0.1
80
+ }
81
+ },
82
+ "max_position_embeddings": 4096,
83
+ "mlp_bias": false,
84
+ "model_type": "llama",
85
+ "num_attention_heads": 32,
86
+ "num_hidden_layers": 32,
87
+ "num_key_value_heads": 32,
88
+ "pretraining_tp": 1,
89
+ "rms_norm_eps": 1e-05,
90
+ "rope_scaling": null,
91
+ "rope_theta": 10000.0,
92
+ "tie_word_embeddings": false,
93
+ "transformers_version": "4.57.3",
94
+ "use_cache": true,
95
+ "vocab_size": 32000
96
+ }
SVD64_llama2/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.57.3"
6
+ }
SVD64_llama2/pytorch_model-00001-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dee5760aa2c07fd6927b786fc12b15f2ec603d17b46bb546e8717c75a180692
3
+ size 4989191305
SVD64_llama2/pytorch_model-00002-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e931345fe7ff136231c6af1ff23be2010a041b8375f4580f94a4847887fea9f4
3
+ size 4977180989
SVD64_llama2/pytorch_model-00003-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2655866df455099c513be4709e8c017a389480cd26df66b3fda25f0f97608df6
3
+ size 4977181117
SVD64_llama2/pytorch_model-00004-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee5a72698fefc1ee8b02693829da9a1b43a400e609843ac544c0e9da7a65ab6e
3
+ size 4977181117
SVD64_llama2/pytorch_model-00005-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36254a063dcd05b2e8cb1aea8e8422d1a386c6cd00d770a11838691b061bb747
3
+ size 4977181117
SVD64_llama2/pytorch_model-00006-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6560e24e92388928012afc6a0503c8f70b6dc13e5c1118e378f3105ca24f573c
3
+ size 2805274337
SVD64_llama2/pytorch_model.bin.index.json ADDED
The diff for this file is too large to render. See raw diff
 
SVD_llama2/config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "IbaXs_LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 11008,
15
+ "main_cfg": {
16
+ "data": {
17
+ "data_path": "./ft-training_set/math_7k.json",
18
+ "dataset_name": "math7k"
19
+ },
20
+ "hyperxs": {
21
+ "latent_feature_dim": 256,
22
+ "layer_embed_dim": 48,
23
+ "layer_norm_epsilon": 1e-05,
24
+ "lora_attn_dim": 32,
25
+ "module_embed_dim": 16,
26
+ "modules_per_layer": 7,
27
+ "n_cross_attn_tokens": 4,
28
+ "out_proj_dim": 64
29
+ },
30
+ "model": {
31
+ "base_model_name": "meta-llama/Llama-2-7b-hf"
32
+ },
33
+ "seed": 42,
34
+ "training": {
35
+ "batch_size": 128,
36
+ "bf16": false,
37
+ "bf16_full_eval": false,
38
+ "cutoff_len": 207,
39
+ "dataloader_num_workers": 4,
40
+ "dataloader_persistent_workers": true,
41
+ "dataloader_pin_memory": true,
42
+ "dataloader_prefetch_factor": 1,
43
+ "eval_steps": 20,
44
+ "eval_strategy": "steps",
45
+ "gradient_checkpointing": false,
46
+ "learning_rate": 1e-05,
47
+ "logging_steps": 1,
48
+ "lr_scheduler_type": "cosine",
49
+ "max_steps": 10,
50
+ "num_train_epochs": 1.0,
51
+ "num_workers": 4,
52
+ "optim": "adamw_torch",
53
+ "output_dir": "runs",
54
+ "per_device_train_batch_size": 32,
55
+ "per_device_valid_batch_size": 64,
56
+ "report_to": "none",
57
+ "resume_from_checkpoint": false,
58
+ "save_safetensors": false,
59
+ "save_steps": 0,
60
+ "torch_compile": false,
61
+ "train_on_inputs": false,
62
+ "val_set_size": 128,
63
+ "warmup_ratio": 0.1
64
+ }
65
+ },
66
+ "max_position_embeddings": 4096,
67
+ "mlp_bias": false,
68
+ "model_type": "llama",
69
+ "num_attention_heads": 32,
70
+ "num_hidden_layers": 32,
71
+ "num_key_value_heads": 32,
72
+ "pretraining_tp": 1,
73
+ "rms_norm_eps": 1e-05,
74
+ "rope_scaling": null,
75
+ "rope_theta": 10000.0,
76
+ "tie_word_embeddings": false,
77
+ "transformers_version": "4.57.3",
78
+ "use_cache": true,
79
+ "vocab_size": 32000
80
+ }
SVD_llama2/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.57.3"
6
+ }
SVD_llama2/pytorch_model-00001-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de1bb1ef2161e2171f7707c43e781875c06698e5893026e4ecfdc53d7ffcecbc
3
+ size 4905313922
SVD_llama2/pytorch_model-00002-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30322e5ccbc982a56027b451744c097a751eddef0ad193067d738e4b193c8734
3
+ size 4917215549
SVD_llama2/pytorch_model-00003-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:323ee40a4b915523360c3493e2a967b68ffe0efa7f3ddc581c7e7366bd026f79
3
+ size 4917215677
SVD_llama2/pytorch_model-00004-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c19895014f0506d3d3f066cac56a8005bf0b7162adca41f8e26459fc1ec6361d
3
+ size 4917215677
SVD_llama2/pytorch_model-00005-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63a79581ebc6e6fc2e8c759406848e2fdbc438d4a81fa3ea6ec9627468b2d6ef
3
+ size 4917215677
SVD_llama2/pytorch_model-00006-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de55a5650f0ec10fb6fbc0ed1a8990a9fc8fa34bd117d160cafd1cf1aafa4098
3
+ size 2710278891
SVD_llama2/pytorch_model.bin.index.json ADDED
The diff for this file is too large to render. See raw diff
 
bash_scripts/all.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ bash ./bash_scripts/main.sh
2
+ bash ./bash_scripts/2main.sh
3
+ bash bash_scripts/cms_eval.sh
bash_scripts/cms_eval.sh ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #!/bin/bash
3
+
4
+ export IBA_CONFIG=./config_draccus/config.yaml
5
+ export TOKENIZERS_PARALLELISM=true
6
+
7
+ # CUDA Include (/cuda.h)
8
+ CUDA_INCLUDE_PATH="/home/work/miniconda3/envs/allm/include"
9
+
10
+ export CPATH=$CPATH:$CUDA_INCLUDE_PATH
11
+ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:$CUDA_INCLUDE_PATH
12
+
13
+ export WANDB_PROJECT="IBA_CMS"
14
+
15
+ date +"%F %T"
16
+
17
+ accelerate launch --dynamo_backend no --main_process_port 41353 -m src.cms_eval \
18
+ --config_path $IBA_CONFIG --infer.model_path exps/CMS/t=30d23h18m55,mlr1.0e-04,b16,r64,n_ct8,t30d23h18m55,initdef,dr0.0,ep3.0,ds14607
bash_scripts/ft.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python -m src.ft #--config_path ./conf_hydra/config.yaml
bash_scripts/main.sh ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # accelerate launch --main_process_port 41353 -m src.main
4
+ # python -m src.main
5
+ # bash bash_scripts/main.sh
6
+
7
+ export IBA_CONFIG=./config_draccus/config.yaml
8
+ export TOKENIZERS_PARALLELISM=true
9
+
10
+ # CUDA Include (/cuda.h)
11
+ CUDA_INCLUDE_PATH="/home/work/miniconda3/envs/allm/include"
12
+
13
+ export CPATH=$CPATH:$CUDA_INCLUDE_PATH
14
+ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:$CUDA_INCLUDE_PATH
15
+
16
+ export WANDB_PROJECT="IBA_CMS"
17
+
18
+ date +"%F %T"
19
+
20
+ STEP=400
21
+ # accelerate launch --dynamo_backend no --main_process_port 41353 -m src.main \
22
+ # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.main \
23
+ accelerate launch --dynamo_backend no --main_process_port 41353 -m src.main \
24
+ --config_path $IBA_CONFIG --training.save_strategy "steps" \
25
+ --training.save_steps $STEP --training.eval_steps $STEP --training.logging_steps $STEP \
26
+ --training.report_to wandb --training.learning_rate 2e-4 \
27
+
28
+ # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.main \
29
+ # --config_path $IBA_CONFIG
30
+
31
+ # wandb sync wandb/latest-run
32
+ date +"%F %T"
conf_hydra/config.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - _self_
3
+ - data: cs15k
4
+ hyperxs:
5
+ lora_attn_dim: 7
6
+
7
+ #model:
8
+ # feature_dim: 768
9
+ #n_layersX: 24
10
+
11
+
12
+
13
+ training:
14
+ batch_train: 64
15
+
16
+ # seed: 42
conf_hydra/data/cifa.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ dataset_name: "cifa"
2
+ data_path:
3
+ - ./data/cifa
conf_hydra/data/cs15k.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ dataset_name: "commonsense"
2
+ data_path: './ft-training_set/commonsense_15k.json'
conf_hydra/data/faci.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ dataset_name: "faci"
2
+ data_path: ./data/faci
conf_hydra/data/math7k.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ dataset_name: "math"
2
+ data_path: './ft-training_set/math_7k.json'
config_draccus/config.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # config.yaml
3
+
4
+ #project_name: "Exp"
5
+
6
+ hyperxs:
7
+ lora_attn_dim: 64
8
+ n_cross_attn_tokens: 8
9
+ latent_feature_dim: 512
10
+ out_proj_dim: 128
11
+
12
+ # model:
13
+ # feature_dim: 768
14
+ #n_layersX: 24
15
+
16
+ data:
17
+ dataset_name: "CMS"
18
+ # data_path: './ft-training_set/math_7k.json'
19
+ data_path: 'ft-training_set/commonsense_147k.json'
20
+ val_set_size: 512
21
+
22
+ training:
23
+ learning_rate: 1e-4
24
+ per_device_train_batch_size: 16
25
+ per_device_eval_batch_size: 32
26
+ max_steps: -1
27
+ num_train_epochs: 2
28
+ logging_steps: 50
29
+ eval_strategy: steps
30
+ eval_steps: 50
31
+ report_to: wandb
32
+
33
+ infer:
34
+ # datasets: ['boolq']
35
+ eval_batch_size: 128
36
+
37
+ seed: 42
38
+
39
+
dataset/AQuA/AQuA.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/AQuA/aqua_1.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/AQuA/test.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/ARC-Challenge/test.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/ARC-Challenge/train.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/ARC-Easy/test.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/ARC-Easy/train.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/AddSub/AddSub.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/AddSub/addsub_1.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/AddSub/test.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/MultiArith/MultiArith.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/MultiArith/multiarith_1.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/MultiArith/test.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/SVAMP/SVAMP.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/SVAMP/svamp_1.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/SVAMP/test.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/SingleEq/SingleEq.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/SingleEq/singleeq_1.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/SingleEq/test.json ADDED
The diff for this file is too large to render. See raw diff