Spaces:
Runtime error
Runtime error
| def train_lora(epochs, batch_size, learning_rate): | |
| try: | |
| dataset = load_dataset("json", data_files=DATASET_PATH) | |
| # Tokenización correcta | |
| def tokenize_fn(example): | |
| return tokenizer( | |
| example["prompt"] + example["completion"], | |
| truncation=True, | |
| padding="max_length", | |
| max_length=256, | |
| ) | |
| tokenized = dataset.map(tokenize_fn, batched=False) | |
| # Asegúrate que las columnas correctas estén | |
| tokenized.set_format(type="torch", columns=["input_ids", "attention_mask"]) | |
| data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) | |
| training_args = TrainingArguments( | |
| output_dir=LORA_PATH, | |
| per_device_train_batch_size=int(batch_size), | |
| num_train_epochs=int(epochs), | |
| learning_rate=learning_rate, | |
| save_total_limit=1, | |
| logging_steps=10, | |
| push_to_hub=False | |
| ) | |
| trainer = Trainer( | |
| model=base_model, | |
| args=training_args, | |
| train_dataset=tokenized["train"], | |
| data_collator=data_collator, | |
| ) | |
| trainer.train() | |
| base_model.save_pretrained(LORA_PATH) | |
| tokenizer.save_pretrained(LORA_PATH) | |
| return "✅ Entrenamiento completado y guardado en ./lora_output" | |
| except Exception as e: | |
| return f"❌ Error durante el entrenamiento: {e}" |