Spaces:
Runtime error
Runtime error
File size: 1,478 Bytes
bc19ef1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
def train_lora(epochs, batch_size, learning_rate):
try:
dataset = load_dataset("json", data_files=DATASET_PATH)
# Tokenización correcta
def tokenize_fn(example):
return tokenizer(
example["prompt"] + example["completion"],
truncation=True,
padding="max_length",
max_length=256,
)
tokenized = dataset.map(tokenize_fn, batched=False)
# Asegúrate que las columnas correctas estén
tokenized.set_format(type="torch", columns=["input_ids", "attention_mask"])
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
training_args = TrainingArguments(
output_dir=LORA_PATH,
per_device_train_batch_size=int(batch_size),
num_train_epochs=int(epochs),
learning_rate=learning_rate,
save_total_limit=1,
logging_steps=10,
push_to_hub=False
)
trainer = Trainer(
model=base_model,
args=training_args,
train_dataset=tokenized["train"],
data_collator=data_collator,
)
trainer.train()
base_model.save_pretrained(LORA_PATH)
tokenizer.save_pretrained(LORA_PATH)
return "✅ Entrenamiento completado y guardado en ./lora_output"
except Exception as e:
return f"❌ Error durante el entrenamiento: {e}" |