cpt core 4
Browse files- scripts/cpt_core_model_4.py +10 -3
scripts/cpt_core_model_4.py
CHANGED
|
@@ -29,7 +29,7 @@ model, tokenizer = FastLanguageModel.from_pretrained(
|
|
| 29 |
model = FastLanguageModel.get_peft_model(
|
| 30 |
model,
|
| 31 |
# r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
| 32 |
-
r =
|
| 33 |
target_modules = [
|
| 34 |
"q_proj", "k_proj", "v_proj", "o_proj",
|
| 35 |
"gate_proj",
|
|
@@ -49,6 +49,9 @@ model = FastLanguageModel.get_peft_model(
|
|
| 49 |
# print(f'{model=}')
|
| 50 |
|
| 51 |
|
|
|
|
|
|
|
|
|
|
| 52 |
from datasets import Dataset
|
| 53 |
from litdata import TokensLoader, StreamingDataset
|
| 54 |
|
|
@@ -58,6 +61,7 @@ litgpt_streaming_dataset = StreamingDataset(
|
|
| 58 |
item_loader=TokensLoader(block_size=dataset_block_size),
|
| 59 |
)
|
| 60 |
|
|
|
|
| 61 |
def unlsoth_generator():
|
| 62 |
global litgpt_streaming_dataset
|
| 63 |
|
|
@@ -68,7 +72,9 @@ def unlsoth_generator():
|
|
| 68 |
# train_dataset = Dataset.from_generator(unlsoth_generator, streaming=True)
|
| 69 |
train_dataset = Dataset.from_generator(unlsoth_generator)
|
| 70 |
|
| 71 |
-
|
|
|
|
|
|
|
| 72 |
from trl import SFTTrainer
|
| 73 |
from transformers import TrainingArguments
|
| 74 |
from unsloth import is_bfloat16_supported
|
|
@@ -104,7 +110,8 @@ trainer = UnslothTrainer(
|
|
| 104 |
fp16=not is_bfloat16_supported(),
|
| 105 |
bf16=is_bfloat16_supported(),
|
| 106 |
logging_steps=1,
|
| 107 |
-
optim='adamw_8bit',
|
|
|
|
| 108 |
weight_decay=0.01,
|
| 109 |
lr_scheduler_type='cosine',
|
| 110 |
seed=23,
|
|
|
|
| 29 |
model = FastLanguageModel.get_peft_model(
|
| 30 |
model,
|
| 31 |
# r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
| 32 |
+
r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
| 33 |
target_modules = [
|
| 34 |
"q_proj", "k_proj", "v_proj", "o_proj",
|
| 35 |
"gate_proj",
|
|
|
|
| 49 |
# print(f'{model=}')
|
| 50 |
|
| 51 |
|
| 52 |
+
#
|
| 53 |
+
#
|
| 54 |
+
#
|
| 55 |
from datasets import Dataset
|
| 56 |
from litdata import TokensLoader, StreamingDataset
|
| 57 |
|
|
|
|
| 61 |
item_loader=TokensLoader(block_size=dataset_block_size),
|
| 62 |
)
|
| 63 |
|
| 64 |
+
|
| 65 |
def unlsoth_generator():
|
| 66 |
global litgpt_streaming_dataset
|
| 67 |
|
|
|
|
| 72 |
# train_dataset = Dataset.from_generator(unlsoth_generator, streaming=True)
|
| 73 |
train_dataset = Dataset.from_generator(unlsoth_generator)
|
| 74 |
|
| 75 |
+
#
|
| 76 |
+
#
|
| 77 |
+
#
|
| 78 |
from trl import SFTTrainer
|
| 79 |
from transformers import TrainingArguments
|
| 80 |
from unsloth import is_bfloat16_supported
|
|
|
|
| 110 |
fp16=not is_bfloat16_supported(),
|
| 111 |
bf16=is_bfloat16_supported(),
|
| 112 |
logging_steps=1,
|
| 113 |
+
# optim='adamw_8bit',
|
| 114 |
+
optim='adamw',
|
| 115 |
weight_decay=0.01,
|
| 116 |
lr_scheduler_type='cosine',
|
| 117 |
seed=23,
|