Spaces:
Sleeping
Sleeping
| from transformers import AlbertForSequenceClassification, AlbertTokenizer, Trainer, TrainingArguments | |
| from datasets import load_dataset | |
| # Load a dataset (replace with your dataset) | |
| dataset = load_dataset("text", data_files={"train": "path/to/train.txt", "test": "path/to/test.txt"}) | |
| # Preprocess the dataset (tokenization, formatting, etc.) | |
| def preprocess_function(examples): | |
| return tokenizer(examples["text"], padding="max_length", truncation=True) | |
| tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2") | |
| tokenized_dataset = dataset.map(preprocess_function, batched=True) | |
| # Load the model | |
| model = AlbertForSequenceClassification.from_pretrained("albert-base-v2", num_labels=2) # Adjust num_labels as needed | |
| # Define training arguments | |
| training_args = TrainingArguments( | |
| output_dir="./results", | |
| num_train_epochs=3, | |
| per_device_train_batch_size=8, | |
| per_device_eval_batch_size=8, | |
| warmup_steps=500, | |
| weight_decay=0.01, | |
| evaluate_during_training=True, | |
| logging_dir="./logs", | |
| ) | |
| # Initialize the Trainer | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=tokenized_dataset["train"], | |
| eval_dataset=tokenized_dataset["test"] | |
| ) | |
| # Train the model | |
| trainer.train() | |
| # Save the fine-tuned model | |
| model.save_pretrained("path/to/save/model") |