Upload 11 files

Browse files

Files changed (11) hide show

config.json +31 -0
generation_config.json +7 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
run.py +128 -0
runs/Aug10_20-51-17_coalabserver/events.out.tfevents.1691680887.coalabserver.1729318.0 +3 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer_config.json +40 -0
training_args.bin +3 -0
vocab.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "facebook/opt-350m",
+  "_remove_final_layer_norm": false,
+  "activation_dropout": 0.0,
+  "activation_function": "relu",
+  "architectures": [
+    "OPTForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 2,
+  "do_layer_norm_before": false,
+  "dropout": 0.1,
+  "enable_bias": true,
+  "eos_token_id": 2,
+  "ffn_dim": 4096,
+  "hidden_size": 1024,
+  "init_std": 0.02,
+  "layer_norm_elementwise_affine": true,
+  "layerdrop": 0.0,
+  "max_position_embeddings": 2048,
+  "model_type": "opt",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 1,
+  "prefix": "</s>",
+  "torch_dtype": "float32",
+  "transformers_version": "4.30.2",
+  "use_cache": true,
+  "vocab_size": 50272,
+  "word_embed_proj_dim": 512
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 2,
+  "eos_token_id": 2,
+  "pad_token_id": 1,
+  "transformers_version": "4.30.2"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bdc0f6367beb73dcde585f6cc516c7a38274144593c3962af90a886d67a7ce3
+size 1324917277

run.py ADDED Viewed

	@@ -0,0 +1,128 @@

+# coding=utf-8
+# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass, field
+from typing import Optional
+import torch
+from datasets import load_dataset
+from peft import LoraConfig
+from tqdm import tqdm
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig, HfArgumentParser, TrainingArguments
+from trl import SFTTrainer
+tqdm.pandas()
+# Define and parse arguments.
+@dataclass
+class ScriptArguments:
+    """
+    The name of the Casual LM model we wish to fine with SFTTrainer
+    """
+    model_name: Optional[str] = field(default="facebook/opt-350m", metadata={"help": "the model name"})
+    dataset_name: Optional[str] = field(
+        default="timdettmers/openassistant-guanaco", metadata={"help": "the dataset name"}
+    )
+    dataset_text_field: Optional[str] = field(default="text", metadata={"help": "the text field of the dataset"})
+    log_with: Optional[str] = field(default=None, metadata={"help": "use 'wandb' to log with wandb"})
+    learning_rate: Optional[float] = field(default=1.41e-5, metadata={"help": "the learning rate"})
+    batch_size: Optional[int] = field(default=8, metadata={"help": "the batch size"})    # 64 original
+    seq_length: Optional[int] = field(default=512, metadata={"help": "Input sequence length"})
+    gradient_accumulation_steps: Optional[int] = field(
+        default=2, metadata={"help": "the number of gradient accumulation steps"}
+    )
+    load_in_8bit: Optional[bool] = field(default=False, metadata={"help": "load the model in 8 bits precision"})
+    load_in_4bit: Optional[bool] = field(default=False, metadata={"help": "load the model in 4 bits precision"})
+    use_peft: Optional[bool] = field(default=False, metadata={"help": "Wether to use PEFT or not to train adapters"})
+    trust_remote_code: Optional[bool] = field(default=True, metadata={"help": "Enable `trust_remote_code`"})
+    output_dir: Optional[str] = field(default="./", metadata={"help": "the output directory"})
+    peft_lora_r: Optional[int] = field(default=8, metadata={"help": "the r parameter of the LoRA adapters"})
+    peft_lora_alpha: Optional[int] = field(default=2, metadata={"help": "the alpha parameter of the LoRA adapters"})
+    logging_steps: Optional[int] = field(default=1, metadata={"help": "the number of logging steps"})
+    use_auth_token: Optional[bool] = field(default=True, metadata={"help": "Use HF auth token to access the model"})
+    num_train_epochs: Optional[int] = field(default=2, metadata={"help": "the number of training epochs"})
+    max_steps: Optional[int] = field(default=-1, metadata={"help": "the number of training steps"})
+parser = HfArgumentParser(ScriptArguments)
+script_args = parser.parse_args_into_dataclasses()[0]
+# Step 1: Load the model
+if script_args.load_in_8bit and script_args.load_in_4bit:
+    raise ValueError("You can't load the model in 8 bits and 4 bits at the same time")
+elif script_args.load_in_8bit or script_args.load_in_4bit:
+    quantization_config = BitsAndBytesConfig(
+        load_in_8bit=script_args.load_in_8bit, load_in_4bit=script_args.load_in_4bit
+    )
+    # This means: fit the entire model on the GPU:0
+    device_map = {"": 0}
+    torch_dtype = torch.bfloat16
+else:
+    device_map = None
+    quantization_config = None
+    torch_dtype = None
+model = AutoModelForCausalLM.from_pretrained(
+    script_args.model_name,
+    quantization_config=quantization_config,
+    device_map=device_map,
+    trust_remote_code=script_args.trust_remote_code,
+    torch_dtype=torch_dtype,
+    use_auth_token=script_args.use_auth_token,
+)
+# Step 2: Load the dataset
+dataset = load_dataset(script_args.dataset_name, split="train")
+# Step 3: Define the training arguments
+training_args = TrainingArguments(
+    output_dir=script_args.output_dir,
+    per_device_train_batch_size=script_args.batch_size,
+    gradient_accumulation_steps=script_args.gradient_accumulation_steps,
+    learning_rate=script_args.learning_rate,
+    logging_steps=script_args.logging_steps,
+    num_train_epochs=script_args.num_train_epochs,
+    max_steps=script_args.max_steps,
+    report_to=script_args.log_with,
+)
+# Step 4: Define the LoraConfig
+if script_args.use_peft:
+    peft_config = LoraConfig(
+        r=script_args.peft_lora_r,
+        lora_alpha=script_args.peft_lora_alpha,
+        bias="none",
+        task_type="CAUSAL_LM",
+    )
+else:
+    peft_config = None
+# Step 5: Define the Trainer
+trainer = SFTTrainer(
+    model=model,
+    args=training_args,
+    max_seq_length=script_args.seq_length,
+    train_dataset=dataset,
+    dataset_text_field=script_args.dataset_text_field,
+    peft_config=peft_config,
+)
+trainer.train()
+# Step 6: Save the model
+trainer.save_model(script_args.output_dir)

runs/Aug10_20-51-17_coalabserver/events.out.tfevents.1691680887.coalabserver.1729318.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12c426a36bd9563e1f716ccb0339dc03c9c6dc5062a547e5b4f82f68a3d093bb
+size 197127

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "add_bos_token": true,
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": true,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": {
+    "__type": "AddedToken",
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:335121ffe87b7016aa5fd6bf5ae4639f01b9fb9a5624b64502fe3a37561c6fb9
+size 3899

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff