nvan13 commited on Dec 31, 2025

Commit

a0d95b0

verified ·

1 Parent(s): 4386fa0

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +10 -0
.gitignore +216 -0
README.md +3 -0
SVD64_llama2/config.json +96 -0
SVD64_llama2/generation_config.json +6 -0
SVD64_llama2/pytorch_model-00001-of-00006.bin +3 -0
SVD64_llama2/pytorch_model-00002-of-00006.bin +3 -0
SVD64_llama2/pytorch_model-00003-of-00006.bin +3 -0
SVD64_llama2/pytorch_model-00004-of-00006.bin +3 -0
SVD64_llama2/pytorch_model-00005-of-00006.bin +3 -0
SVD64_llama2/pytorch_model-00006-of-00006.bin +3 -0
SVD64_llama2/pytorch_model.bin.index.json +0 -0
SVD_llama2/config.json +80 -0
SVD_llama2/generation_config.json +6 -0
SVD_llama2/pytorch_model-00001-of-00006.bin +3 -0
SVD_llama2/pytorch_model-00002-of-00006.bin +3 -0
SVD_llama2/pytorch_model-00003-of-00006.bin +3 -0
SVD_llama2/pytorch_model-00004-of-00006.bin +3 -0
SVD_llama2/pytorch_model-00005-of-00006.bin +3 -0
SVD_llama2/pytorch_model-00006-of-00006.bin +3 -0
SVD_llama2/pytorch_model.bin.index.json +0 -0
bash_scripts/all.sh +3 -0
bash_scripts/cms_eval.sh +18 -0
bash_scripts/ft.sh +1 -0
bash_scripts/main.sh +32 -0
conf_hydra/config.yaml +16 -0
conf_hydra/data/cifa.yaml +3 -0
conf_hydra/data/cs15k.yaml +2 -0
conf_hydra/data/faci.yaml +2 -0
conf_hydra/data/math7k.yaml +2 -0
config_draccus/config.yaml +39 -0
dataset/AQuA/AQuA.json +0 -0
dataset/AQuA/aqua_1.json +0 -0
dataset/AQuA/test.json +0 -0
dataset/ARC-Challenge/test.json +0 -0
dataset/ARC-Challenge/train.json +0 -0
dataset/ARC-Easy/test.json +0 -0
dataset/ARC-Easy/train.json +0 -0
dataset/AddSub/AddSub.json +0 -0
dataset/AddSub/addsub_1.json +0 -0
dataset/AddSub/test.json +0 -0
dataset/MultiArith/MultiArith.json +0 -0
dataset/MultiArith/multiarith_1.json +0 -0
dataset/MultiArith/test.json +0 -0
dataset/SVAMP/SVAMP.json +0 -0
dataset/SVAMP/svamp_1.json +0 -0
dataset/SVAMP/test.json +0 -0
dataset/SingleEq/SingleEq.json +0 -0
dataset/SingleEq/singleeq_1.json +0 -0
dataset/SingleEq/test.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+dataset/hellaswag/test.json filter=lfs diff=lfs merge=lfs -text
+dataset/hellaswag/train.json filter=lfs diff=lfs merge=lfs -text
+dataset/social_i_qa/train.json filter=lfs diff=lfs merge=lfs -text
+dataset/winogrande/train.json filter=lfs diff=lfs merge=lfs -text
+ft-training_set/alpaca_data.json filter=lfs diff=lfs merge=lfs -text
+ft-training_set/alpaca_data_cleaned.json filter=lfs diff=lfs merge=lfs -text
+ft-training_set/commonsense_147k.json filter=lfs diff=lfs merge=lfs -text
+ft-training_set/commonsense_170k.json filter=lfs diff=lfs merge=lfs -text
+ft-training_set/math_14k.json filter=lfs diff=lfs merge=lfs -text
+ft-training_set/math_50k.json filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,216 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+.DS_Store
+trainer_output/
+outputs/
+src/note.ipynb
+wandb/
+runs/
+src/*.ipynb

README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Instance-based-FT
2	+
3	+

SVD64_llama2/config.json ADDED Viewed

	@@ -0,0 +1,96 @@

+{
+  "architectures": [
+    "IbaXs_LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "main_cfg": {
+    "data": {
+      "data_path": "ft-training_set/commonsense_15k.json",
+      "dataset_name": "CMS",
+      "val_set_size": 512
+    },
+    "hyperxs": {
+      "drop_out": 0.0,
+      "latent_feature_dim": 512,
+      "layer_embed_dim": 48,
+      "layer_norm_epsilon": 1e-05,
+      "lora_attn_dim": 64,
+      "module_embed_dim": 16,
+      "modules_per_layer": 7,
+      "n_cross_attn_tokens": 8,
+      "out_proj_dim": 128
+    },
+    "infer": {
+      "datasets": [
+        "boolq"
+      ],
+      "eval_batch_size": 128,
+      "is_json": true,
+      "model_path": ""
+    },
+    "model": {
+      "base_model_name": "meta-llama/Llama-2-7b-hf",
+      "cutoff_len": 512,
+      "train_on_inputs": false
+    },
+    "run_text": "def",
+    "seed": 42,
+    "training": {
+      "bf16": false,
+      "bf16_full_eval": false,
+      "dataloader_num_workers": 4,
+      "dataloader_persistent_workers": true,
+      "dataloader_pin_memory": true,
+      "dataloader_prefetch_factor": 1,
+      "eval_delay": 0,
+      "eval_steps": 1,
+      "eval_strategy": "steps",
+      "gradient_accumulation_steps": 1,
+      "gradient_checkpointing": false,
+      "gradient_checkpointing_kwargs": {
+        "use_reentrant": false
+      },
+      "learning_rate": 1e-36,
+      "load_best_model_at_end": true,
+      "logging_steps": 1,
+      "lr_scheduler_type": "cosine",
+      "max_steps": -1,
+      "num_train_epochs": 3.0,
+      "num_workers": 2,
+      "optim": "adamw_torch",
+      "output_dir": "exps",
+      "per_device_eval_batch_size": 32,
+      "per_device_train_batch_size": 16,
+      "report_to": "none",
+      "resume_from_checkpoint": false,
+      "save_safetensors": false,
+      "save_steps": 1.0,
+      "save_strategy": "steps",
+      "save_total_limit": 1,
+      "warmup_ratio": 0.1
+    }
+  },
+  "max_position_embeddings": 4096,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.57.3",
+  "use_cache": true,
+  "vocab_size": 32000
+}

SVD64_llama2/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.57.3"
+}

SVD64_llama2/pytorch_model-00001-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6dee5760aa2c07fd6927b786fc12b15f2ec603d17b46bb546e8717c75a180692
+size 4989191305

SVD64_llama2/pytorch_model-00002-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e931345fe7ff136231c6af1ff23be2010a041b8375f4580f94a4847887fea9f4
+size 4977180989

SVD64_llama2/pytorch_model-00003-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2655866df455099c513be4709e8c017a389480cd26df66b3fda25f0f97608df6
+size 4977181117

SVD64_llama2/pytorch_model-00004-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee5a72698fefc1ee8b02693829da9a1b43a400e609843ac544c0e9da7a65ab6e
+size 4977181117

SVD64_llama2/pytorch_model-00005-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36254a063dcd05b2e8cb1aea8e8422d1a386c6cd00d770a11838691b061bb747
+size 4977181117

SVD64_llama2/pytorch_model-00006-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6560e24e92388928012afc6a0503c8f70b6dc13e5c1118e378f3105ca24f573c
+size 2805274337

SVD64_llama2/pytorch_model.bin.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

SVD_llama2/config.json ADDED Viewed

	@@ -0,0 +1,80 @@

+{
+  "architectures": [
+    "IbaXs_LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "main_cfg": {
+    "data": {
+      "data_path": "./ft-training_set/math_7k.json",
+      "dataset_name": "math7k"
+    },
+    "hyperxs": {
+      "latent_feature_dim": 256,
+      "layer_embed_dim": 48,
+      "layer_norm_epsilon": 1e-05,
+      "lora_attn_dim": 32,
+      "module_embed_dim": 16,
+      "modules_per_layer": 7,
+      "n_cross_attn_tokens": 4,
+      "out_proj_dim": 64
+    },
+    "model": {
+      "base_model_name": "meta-llama/Llama-2-7b-hf"
+    },
+    "seed": 42,
+    "training": {
+      "batch_size": 128,
+      "bf16": false,
+      "bf16_full_eval": false,
+      "cutoff_len": 207,
+      "dataloader_num_workers": 4,
+      "dataloader_persistent_workers": true,
+      "dataloader_pin_memory": true,
+      "dataloader_prefetch_factor": 1,
+      "eval_steps": 20,
+      "eval_strategy": "steps",
+      "gradient_checkpointing": false,
+      "learning_rate": 1e-05,
+      "logging_steps": 1,
+      "lr_scheduler_type": "cosine",
+      "max_steps": 10,
+      "num_train_epochs": 1.0,
+      "num_workers": 4,
+      "optim": "adamw_torch",
+      "output_dir": "runs",
+      "per_device_train_batch_size": 32,
+      "per_device_valid_batch_size": 64,
+      "report_to": "none",
+      "resume_from_checkpoint": false,
+      "save_safetensors": false,
+      "save_steps": 0,
+      "torch_compile": false,
+      "train_on_inputs": false,
+      "val_set_size": 128,
+      "warmup_ratio": 0.1
+    }
+  },
+  "max_position_embeddings": 4096,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.57.3",
+  "use_cache": true,
+  "vocab_size": 32000
+}

SVD_llama2/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.57.3"
+}

SVD_llama2/pytorch_model-00001-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de1bb1ef2161e2171f7707c43e781875c06698e5893026e4ecfdc53d7ffcecbc
+size 4905313922

SVD_llama2/pytorch_model-00002-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30322e5ccbc982a56027b451744c097a751eddef0ad193067d738e4b193c8734
+size 4917215549

SVD_llama2/pytorch_model-00003-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:323ee40a4b915523360c3493e2a967b68ffe0efa7f3ddc581c7e7366bd026f79
+size 4917215677

SVD_llama2/pytorch_model-00004-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c19895014f0506d3d3f066cac56a8005bf0b7162adca41f8e26459fc1ec6361d
+size 4917215677

SVD_llama2/pytorch_model-00005-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:63a79581ebc6e6fc2e8c759406848e2fdbc438d4a81fa3ea6ec9627468b2d6ef
+size 4917215677

SVD_llama2/pytorch_model-00006-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de55a5650f0ec10fb6fbc0ed1a8990a9fc8fa34bd117d160cafd1cf1aafa4098
+size 2710278891

SVD_llama2/pytorch_model.bin.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

bash_scripts/all.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+bash ./bash_scripts/main.sh
+bash ./bash_scripts/2main.sh
+bash bash_scripts/cms_eval.sh

bash_scripts/cms_eval.sh ADDED Viewed

	@@ -0,0 +1,18 @@

+#!/bin/bash
+export IBA_CONFIG=./config_draccus/config.yaml
+export TOKENIZERS_PARALLELISM=true
+# CUDA Include (/cuda.h)
+CUDA_INCLUDE_PATH="/home/work/miniconda3/envs/allm/include"
+export CPATH=$CPATH:$CUDA_INCLUDE_PATH
+export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:$CUDA_INCLUDE_PATH
+export WANDB_PROJECT="IBA_CMS"
+date +"%F %T"
+accelerate launch --dynamo_backend no --main_process_port 41353 -m src.cms_eval \
+    --config_path $IBA_CONFIG --infer.model_path exps/CMS/t=30d23h18m55,mlr1.0e-04,b16,r64,n_ct8,t30d23h18m55,initdef,dr0.0,ep3.0,ds14607

bash_scripts/ft.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python -m src.ft #--config_path ./conf_hydra/config.yaml

bash_scripts/main.sh ADDED Viewed

	@@ -0,0 +1,32 @@

+#!/bin/bash
+# accelerate launch --main_process_port 41353 -m src.main
+# python -m src.main
+# bash bash_scripts/main.sh
+export IBA_CONFIG=./config_draccus/config.yaml
+export TOKENIZERS_PARALLELISM=true
+# CUDA Include (/cuda.h)
+CUDA_INCLUDE_PATH="/home/work/miniconda3/envs/allm/include"
+export CPATH=$CPATH:$CUDA_INCLUDE_PATH
+export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:$CUDA_INCLUDE_PATH
+export WANDB_PROJECT="IBA_CMS"
+date +"%F %T"
+STEP=400
+# accelerate launch --dynamo_backend no --main_process_port 41353 -m src.main \
+# accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.main \
+accelerate launch --dynamo_backend no --main_process_port 41353 -m src.main \
+    --config_path $IBA_CONFIG --training.save_strategy "steps" \
+    --training.save_steps $STEP --training.eval_steps $STEP --training.logging_steps $STEP \
+    --training.report_to wandb --training.learning_rate 2e-4 \
+# accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.main \
+#     --config_path $IBA_CONFIG
+# wandb sync wandb/latest-run
+date +"%F %T"

conf_hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+defaults:
+  - _self_
+  - data: cs15k
+hyperxs:
+  lora_attn_dim: 7
+#model:
+#   feature_dim: 768
+  #n_layersX: 24
+training:
+  batch_train: 64
+# seed: 42

conf_hydra/data/cifa.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+dataset_name: "cifa"
+data_path:
+  - ./data/cifa

conf_hydra/data/cs15k.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ dataset_name: "commonsense"
2	+ data_path: './ft-training_set/commonsense_15k.json'

conf_hydra/data/faci.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ dataset_name: "faci"
2	+ data_path: ./data/faci

conf_hydra/data/math7k.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ dataset_name: "math"
2	+ data_path: './ft-training_set/math_7k.json'

config_draccus/config.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+# config.yaml
+#project_name: "Exp"
+hyperxs:
+  lora_attn_dim: 64
+  n_cross_attn_tokens: 8
+  latent_feature_dim: 512
+  out_proj_dim: 128
+# model:
+#   feature_dim: 768
+  #n_layersX: 24
+data:
+  dataset_name: "CMS"
+  # data_path: './ft-training_set/math_7k.json'
+  data_path: 'ft-training_set/commonsense_147k.json'
+  val_set_size: 512
+training:
+  learning_rate: 1e-4
+  per_device_train_batch_size: 16
+  per_device_eval_batch_size: 32
+  max_steps: -1
+  num_train_epochs: 2
+  logging_steps: 50
+  eval_strategy: steps
+  eval_steps: 50
+  report_to: wandb
+infer:
+  # datasets: ['boolq']
+  eval_batch_size: 128
+seed: 42

dataset/AQuA/AQuA.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/AQuA/aqua_1.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/AQuA/test.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/ARC-Challenge/test.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/ARC-Challenge/train.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/ARC-Easy/test.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/ARC-Easy/train.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/AddSub/AddSub.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/AddSub/addsub_1.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/AddSub/test.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/MultiArith/MultiArith.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/MultiArith/multiarith_1.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/MultiArith/test.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/SVAMP/SVAMP.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/SVAMP/svamp_1.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/SVAMP/test.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/SingleEq/SingleEq.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/SingleEq/singleeq_1.json ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/SingleEq/test.json ADDED Viewed

The diff for this file is too large to render. See raw diff