Spaces:
Runtime error
Runtime error
File size: 2,118 Bytes
9fc7f59 5c902c4 9fc7f59 5c902c4 20f3c99 9fc7f59 5c902c4 9fc7f59 20f3c99 9fc7f59 5c902c4 b47081f 5c902c4 b47081f 5c902c4 9fc7f59 5c902c4 b47081f 9fc7f59 5c902c4 9fc7f59 b47081f 9fc7f59 5c902c4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
import torch
# Check for GPU and set device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load dataset
dataset = load_dataset("mrohith29/high-school-physics", split="train")
# Load model
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device) # Move model to GPU/CPU
# Add padding token if missing
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model.resize_token_embeddings(len(tokenizer))
# Formatting function
def format_example(question, choices, answer, explanation):
return f"""### Instruction: {question}\n### Choices: {choices}\n### Answer: {answer}\n### Explanation: {explanation}"""
# Tokenization with automatic device handling
def tokenize(examples):
formatted_texts = [
format_example(q, ch, a, exp)
for q, ch, a, exp in zip(
examples["question"],
examples["choices"],
examples["answer"],
examples["explanation"]
)
]
return tokenizer(formatted_texts, truncation=True, padding="max_length", max_length=256)
tokenized_dataset = dataset.map(tokenize, batched=True, remove_columns=dataset.column_names)
# Training arguments (optimized for current hardware)
training_args = TrainingArguments(
output_dir="./output",
per_device_train_batch_size=4 if device == "cuda" else 2, # Larger batches on GPU
num_train_epochs=1,
save_strategy="epoch",
logging_steps=10,
fp16=torch.cuda.is_available(), # Enable only if GPU exists
push_to_hub=False,
dataloader_pin_memory=torch.cuda.is_available(), # Pin memory only for GPU
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
)
trainer.train()
model.save_pretrained("./output")
tokenizer.save_pretrained("./output")
print(f"β
Training complete on {device.upper()}! Model saved in ./output") |