Squidnutz commited on
Commit
10b8d6f
·
verified ·
1 Parent(s): 85f8a74

Upload 3 files

Browse files
Files changed (3) hide show
  1. combined_tokenized_data.txt +0 -0
  2. requirements.txt +4 -0
  3. train_phi3.py +12 -0
combined_tokenized_data.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers==4.28.1
2
+ datasets==2.12.0
3
+ accelerate==0.21.0
4
+ torch==2.0.1
train_phi3.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
2
+ from datasets import load_dataset
3
+
4
+ # Load model and tokenizer
5
+ model_name = "microsoft/Phi-3-mini-128k-instruct"
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForCausalLM.from_pretrained(model_name)
8
+
9
+ # Load dataset
10
+ dataset = load_dataset("text", data_files="combined_tokenized_data.txt")["train"]
11
+
12
+ # ... (rest of your code for tokenization, data collator, training arguments, etc.)