update model

Browse files

Files changed (13) hide show

.gitattributes +3 -0
.keep +0 -0
README.md +14 -39
adapter_config.json +26 -0
adapter_model.bin +3 -0
added_tokens.json +3 -0
config.json +1 -0
inject.json +1 -0
special_tokens_map.json +6 -0
tokenizer.json +0 -0
tokenizer.model +0 -0
tokenizer_config.json +33 -0
training_args.bin +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,3 @@

+adapter_model.bin filter=lfs diff=lfs merge=lfs -text
+training_args.bin filter=lfs diff=lfs merge=lfs -text
+events.out.* filter=lfs diff=lfs merge=lfs -text

.keep ADDED Viewed

File without changes

README.md CHANGED Viewed

@@ -1,45 +1,20 @@
 ---
-language: en
-tags:
-- azbert
-- pretraining
-- fill-mask
-widget:
-- text: "$f$ $($ $x$ [MASK] $y$ $)$"
-  example_title: "mathy"
-- text: "$x$ [MASK] $x$ $equal$ $2$ $x$"
-  example_title: "mathy"
-- text: "Proof by [MASK] that $n$ $fact$ $gt$ $3$ $n$ for $n$ $gt$ $6$"
-  example_title: "mathy"
-- text: "Proof by induction that $n$ [MASK] $gt$ $3$ $n$ for $n$ $gt$ $6$"
-  example_title: "mathy"
-- text: "The goal of life is [MASK]."
-  example_title: "philosophical"
-license: mit
 ---
-## About
-This repository is a boilerplate to push a mask-filling model to the HuggingFace Model Hub.
-### Checklist
-* `git-lfs` is installed
-* tokenizer contains all the files needed: `added_tokens.json`, `special_tokens_map.json`, `tokenizer_config.json`, `vocab.txt` and `tokenizer.json`
-* no `tokenizer_file` field in `tokenizer_config.json` (sometimes it is located locally at `~/.cache`)
-### Upload
-1. Put the model checkpoints and optionally log files (`*.bin` and log files `events.out.*`) to the `./ckpt` directory.
-2. Add a branch `hgf` to point to your huggingface repo. For example `git remote add hgf git@hf.co:approach0/mathy-vicuna-13B-FFT`
-3. Run the `upload2hgf.sh` script.
-### Test the MLM task (an example)
-```sh
-pip install pya0 # for math token preprocessing
-# testing local checkpoints:
-python test.py ./ckpt/math-tokenizer ./ckpt/2-2-0/encoder.ckpt
-# testing Model Hub checkpoints:
-python test.py approach0/coco-mae-220 approach0/coco-mae-220
-```
-> **Note**
-> Modify the test examples in `test.txt` to play with it.
-> The test file is tab-separated, the first column is additional positions you want to mask for the right-side sentence (useful for masking tokens in math markups).
-> A zero means no additional mask positions.

 ---
+library_name: peft
 ---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- load_in_8bit: True
+- load_in_4bit: False
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: fp4
+- bnb_4bit_use_double_quant: False
+- bnb_4bit_compute_dtype: float32
+### Framework versions
+- PEFT 0.4.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "WizardLM/WizardMath-13B-V1.0",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "gate_proj",
+    "down_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fe08d0f7339e3076f911dc383b57f8a7be4a9759294986daca113fd3f5d40ff
+size 125368013

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[PAD]": 32000
+}

config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"mode": "finetune", "wandb": "True", "batch_size": "12", "context_length": "2048", "flash_atten": "True", "tokenizer": "WizardLM/WizardMath-13B-V1.0", "model": "WizardLM/WizardMath-13B-V1.0", "dataset": "approach0/retrieval-augment-finetune", "dataset_shuffle": "True", "dataset_map_fn": "datamap_double_train_for_query_and_answer", "collate_fn": "collate_final_dataset_for_generalist", "collate_add_eos": "True", "collate_debug": "True", "eval_during_train": "no", "peft": "{\n\"peft_attach_new\": true,\n\"peft_lora_rank\": 16,\n\"peft_lora_alpha\": 16,\n\"peft_lora_targets\": [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"down_proj\", \"up_proj\"]\n}", "trainer": "[\n\"--output_dir\", \"<exp_outdir>\",\n\"--save_strategy\", \"steps\",\n\"--save_steps\", 100,\n\"--save_total_limit\", 2,\n\"--evaluation_strategy\", \"no\",\n\"--eval_steps\", 10,\n\"--report_to\", \"wandb\",\n\"--logging_steps\", 1,\n\"--per_device_train_batch_size\", 1,\n\"--remove_unused_columns\", false,\n\"--gradient_accumulation_steps\", 12,\n\"--learning_rate\", 2e-5,\n\"--warmup_steps\", 10,\n\"--optim\", \"adamw_torch\",\n\"--num_train_epochs\", 2,\n\"--fp16\", false,\n\"--bf16\", true,\n\"--seed\", 70\n]", "load_in_8bit": "True", "device_map": null, "deepspeed": "{\n\"en_param_offload\": true\n}", "local_rank": "0", "run": "GCR-13B-wizardmath-generalist", "7b_vicuna_v1_5": "lmsys/vicuna-7b-v1.5", "7b_vicuna_v1_5_32k": "lmsys/longchat-7b-v1.5-32k", "test_lora_repo": "Lajonbot/vicuna-7b-v1.5-PL-lora_adapter_model", "13b_mathy_fft": "approach0/mathy-vicuna-13B-FFT", "7b_wizardmath": "WizardLM/WizardMath-7B-V1.0", "13b_wizardmath": "WizardLM/WizardMath-13B-V1.0", "70b_wizardmath": "WizardLM/WizardMath-70B-V1.0", "7b_mammoth": "TIGER-Lab/MAmmoTH-7B", "13b_mammoth": "TIGER-Lab/MAmmoTH-13B", "34b_mammoth_code": "TIGER-Lab/MAmmoTH-Coder-13B", "70b_mammoth": "TIGER-Lab/MAmmoTH-70B", "7b_tora": "llm-agents/tora-7b-v1.0", "13b_tora": "llm-agents/tora-13b-v1.0", "seed": "70", "output_dir": "./output", "add_sys_paths": "[\"../Progressive-Hint\", \"../math/modeling\"]"}

inject.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"model": "WizardLM/WizardMath-13B-V1.0", "tokenizer": "WizardLM/WizardMath-13B-V1.0", "run": "GCR-13B-wizardmath-generalist"}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "</s>",
+  "eos_token": "</s>",
+  "pad_token": "[PAD]",
+  "unk_token": "</s>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

Binary file (500 kB). View file

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "legacy": false,
+  "model_max_length": 1024,
+  "pad_token": null,
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a715ccbe7aeb9ed4cc0f698cdfb669ff20589fbb57a74014fea3aad0a42588fb
+size 6267