drzeeIslam commited on
Commit
0d47e08
·
verified ·
1 Parent(s): 421e59d

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +3 -3
train.py CHANGED
@@ -1,8 +1,8 @@
1
  from datasets import load_dataset
2
  from transformers import AutoTokenizer, DataCollatorForLanguageModeling, Trainer, TrainingArguments, AutoModelForMaskedLM
3
 
4
- # Load dataset from local CSV
5
- dataset = load_dataset("text", data_files="chunks.csv")
6
 
7
  # Load tokenizer and model
8
  model_checkpoint = "distilbert-base-uncased"
@@ -36,5 +36,5 @@ trainer = Trainer(
36
  data_collator=data_collator
37
  )
38
 
39
- # Train the model
40
  trainer.train()
 
1
  from datasets import load_dataset
2
  from transformers import AutoTokenizer, DataCollatorForLanguageModeling, Trainer, TrainingArguments, AutoModelForMaskedLM
3
 
4
+ # Load dataset from Hugging Face Hub
5
+ dataset = load_dataset("drzeeIslam/nelson-gpt-chunks")
6
 
7
  # Load tokenizer and model
8
  model_checkpoint = "distilbert-base-uncased"
 
36
  data_collator=data_collator
37
  )
38
 
39
+ # Start training
40
  trainer.train()