Saad381
/

SpectraGen

template:diffusion-lora

Model card Files Files and versions

Saad381 commited on Oct 11, 2024

Commit

dade59d

·

verified ·

1 Parent(s): 1abae7c

Upload train.py

Files changed (1) hide show

train.py +40 -0

train.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
+from datasets import load_dataset
+# Load the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained("Saad381/SpectraGen")
+tokenizer = AutoTokenizer.from_pretrained("Saad381/SpectraGen")
+# Load your dataset (CSV file assumed here)
+dataset = load_dataset('csv', data_files='dataset.csv')
+# Tokenize your dataset
+def tokenize_function(examples):
+    return tokenizer(examples["text"], padding="max_length", truncation=True)
+tokenized_datasets = dataset.map(tokenize_function, batched=True)
+# Define training arguments
+training_args = TrainingArguments(
+    output_dir='./results',          # output directory
+    evaluation_strategy="epoch",     # evaluate at end of each epoch
+    per_device_train_batch_size=8,   # batch size
+    num_train_epochs=3,              # number of training epochs
+    save_steps=10_000,               # steps to save checkpoint
+    save_total_limit=2,              # limit the total amount of checkpoints
+)
+# Initialize the Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_datasets["train"],
+    eval_dataset=tokenized_datasets["test"]
+)
+# Train the model
+trainer.train()
+# Save the model
+model.save_pretrained('./trained_model')
+tokenizer.save_pretrained('./trained_model')