Upload train_survival.py with huggingface_hub
Browse files- train_survival.py +11 -11
train_survival.py
CHANGED
|
@@ -10,10 +10,9 @@ import torch
|
|
| 10 |
import os
|
| 11 |
|
| 12 |
# Configuration
|
| 13 |
-
|
| 14 |
-
MODEL_ID = "Qwen/Qwen2.5-3B-Instruct"
|
| 15 |
DATASET_ID = "sunkencity/survival-instruct"
|
| 16 |
-
OUTPUT_MODEL_ID = "sunkencity/survival-expert-3b"
|
| 17 |
|
| 18 |
# Load Dataset
|
| 19 |
dataset = load_dataset(DATASET_ID, split="train")
|
|
@@ -30,10 +29,11 @@ def filter_empty(example):
|
|
| 30 |
dataset = dataset.filter(filter_empty)
|
| 31 |
|
| 32 |
# Load Model
|
|
|
|
| 33 |
bnb_config = BitsAndBytesConfig(
|
| 34 |
load_in_4bit=True,
|
| 35 |
bnb_4bit_quant_type="nf4",
|
| 36 |
-
bnb_4bit_compute_dtype=torch.
|
| 37 |
)
|
| 38 |
|
| 39 |
model = AutoModelForCausalLM.from_pretrained(
|
|
@@ -41,7 +41,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 41 |
quantization_config=bnb_config,
|
| 42 |
device_map="auto",
|
| 43 |
use_cache=False,
|
| 44 |
-
torch_dtype=torch.
|
| 45 |
)
|
| 46 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 47 |
tokenizer.pad_token = tokenizer.eos_token
|
|
@@ -50,7 +50,7 @@ tokenizer.pad_token = tokenizer.eos_token
|
|
| 50 |
def format_row(example):
|
| 51 |
instruction = example['instruction']
|
| 52 |
response = example['response']
|
| 53 |
-
text = f"
|
| 54 |
return {"text": text}
|
| 55 |
|
| 56 |
dataset = dataset.map(format_row)
|
|
@@ -69,14 +69,14 @@ peft_config = LoraConfig(
|
|
| 69 |
training_args = SFTConfig(
|
| 70 |
output_dir="./results",
|
| 71 |
num_train_epochs=3,
|
| 72 |
-
per_device_train_batch_size=
|
| 73 |
-
gradient_accumulation_steps=
|
| 74 |
learning_rate=2e-4,
|
| 75 |
logging_steps=10,
|
| 76 |
push_to_hub=True,
|
| 77 |
hub_model_id=OUTPUT_MODEL_ID,
|
| 78 |
-
fp16=
|
| 79 |
-
bf16=False, # Disable BF16
|
| 80 |
packing=False,
|
| 81 |
max_length=1024,
|
| 82 |
dataset_text_field="text"
|
|
@@ -96,4 +96,4 @@ trainer.train()
|
|
| 96 |
|
| 97 |
print("Pushing to hub...")
|
| 98 |
trainer.push_to_hub()
|
| 99 |
-
print("Done!")
|
|
|
|
| 10 |
import os
|
| 11 |
|
| 12 |
# Configuration
|
| 13 |
+
MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"
|
|
|
|
| 14 |
DATASET_ID = "sunkencity/survival-instruct"
|
| 15 |
+
OUTPUT_MODEL_ID = "sunkencity/survival-expert-llama-3b"
|
| 16 |
|
| 17 |
# Load Dataset
|
| 18 |
dataset = load_dataset(DATASET_ID, split="train")
|
|
|
|
| 29 |
dataset = dataset.filter(filter_empty)
|
| 30 |
|
| 31 |
# Load Model
|
| 32 |
+
# We keep 4-bit loading for memory efficiency, but compute in float32 to avoid kernel issues
|
| 33 |
bnb_config = BitsAndBytesConfig(
|
| 34 |
load_in_4bit=True,
|
| 35 |
bnb_4bit_quant_type="nf4",
|
| 36 |
+
bnb_4bit_compute_dtype=torch.float32, # Changed to float32
|
| 37 |
)
|
| 38 |
|
| 39 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 41 |
quantization_config=bnb_config,
|
| 42 |
device_map="auto",
|
| 43 |
use_cache=False,
|
| 44 |
+
torch_dtype=torch.float32 # Changed to float32
|
| 45 |
)
|
| 46 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 47 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
|
| 50 |
def format_row(example):
|
| 51 |
instruction = example['instruction']
|
| 52 |
response = example['response']
|
| 53 |
+
text = f"Instruction: {instruction}\nResponse: {response}{tokenizer.eos_token}"
|
| 54 |
return {"text": text}
|
| 55 |
|
| 56 |
dataset = dataset.map(format_row)
|
|
|
|
| 69 |
training_args = SFTConfig(
|
| 70 |
output_dir="./results",
|
| 71 |
num_train_epochs=3,
|
| 72 |
+
per_device_train_batch_size=1, # Reduced batch size for FP32
|
| 73 |
+
gradient_accumulation_steps=16, # Increased accumulation to compensate
|
| 74 |
learning_rate=2e-4,
|
| 75 |
logging_steps=10,
|
| 76 |
push_to_hub=True,
|
| 77 |
hub_model_id=OUTPUT_MODEL_ID,
|
| 78 |
+
fp16=False, # Disable Mixed Precision
|
| 79 |
+
bf16=False, # Disable BF16
|
| 80 |
packing=False,
|
| 81 |
max_length=1024,
|
| 82 |
dataset_text_field="text"
|
|
|
|
| 96 |
|
| 97 |
print("Pushing to hub...")
|
| 98 |
trainer.push_to_hub()
|
| 99 |
+
print("Done!")
|