Spaces:
Paused
Paused
Update train.py
Browse files
train.py
CHANGED
|
@@ -239,7 +239,11 @@ def main(push_to_hub=True, is_inst_finetune=False):
|
|
| 239 |
dataset = load_data()
|
| 240 |
print("Loaded data.")
|
| 241 |
|
| 242 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
print("Making Corpus..")
|
| 244 |
training_corpus = get_training_corpus(dataset)
|
| 245 |
print("Made Corpus.")
|
|
@@ -247,10 +251,6 @@ def main(push_to_hub=True, is_inst_finetune=False):
|
|
| 247 |
print("Making Tokenizer..")
|
| 248 |
tokenizer = create_tokenizer(training_corpus)
|
| 249 |
print(f"Made Tokenizer with size {len(tokenizer)}.")
|
| 250 |
-
else:
|
| 251 |
-
print("Loading Tokenizer..")
|
| 252 |
-
tokenizer = load_tokenizer()
|
| 253 |
-
print("Loaded Tokenizer.")
|
| 254 |
|
| 255 |
# print("Adding Tokens..")
|
| 256 |
# num_new_tokens = update_tokenizer(tokenizer, dataset)
|
|
@@ -261,13 +261,13 @@ def main(push_to_hub=True, is_inst_finetune=False):
|
|
| 261 |
configure_tokenizer(tokenizer)
|
| 262 |
print("Added Tokens.")
|
| 263 |
|
| 264 |
-
if is_inst_finetune
|
| 265 |
print("Loading Model..")
|
| 266 |
model = load_model()
|
| 267 |
print("Loaded Model.")
|
| 268 |
else:
|
| 269 |
print("Creating Model..")
|
| 270 |
-
model = create_model(tokenizer)
|
| 271 |
print("Created Model.")
|
| 272 |
|
| 273 |
print("Resizing Token Embeddings..")
|
|
|
|
| 239 |
dataset = load_data()
|
| 240 |
print("Loaded data.")
|
| 241 |
|
| 242 |
+
if is_inst_finetune and INIT > 0:
|
| 243 |
+
print("Loading Tokenizer..")
|
| 244 |
+
tokenizer = load_tokenizer()
|
| 245 |
+
print("Loaded Tokenizer.")
|
| 246 |
+
else:
|
| 247 |
print("Making Corpus..")
|
| 248 |
training_corpus = get_training_corpus(dataset)
|
| 249 |
print("Made Corpus.")
|
|
|
|
| 251 |
print("Making Tokenizer..")
|
| 252 |
tokenizer = create_tokenizer(training_corpus)
|
| 253 |
print(f"Made Tokenizer with size {len(tokenizer)}.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
# print("Adding Tokens..")
|
| 256 |
# num_new_tokens = update_tokenizer(tokenizer, dataset)
|
|
|
|
| 261 |
configure_tokenizer(tokenizer)
|
| 262 |
print("Added Tokens.")
|
| 263 |
|
| 264 |
+
if is_inst_finetune or INIT > 0:
|
| 265 |
print("Loading Model..")
|
| 266 |
model = load_model()
|
| 267 |
print("Loaded Model.")
|
| 268 |
else:
|
| 269 |
print("Creating Model..")
|
| 270 |
+
model = create_model(tokenizer)
|
| 271 |
print("Created Model.")
|
| 272 |
|
| 273 |
print("Resizing Token Embeddings..")
|