Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ import requests as rq
|
|
| 9 |
import gc
|
| 10 |
from tokenizers import ByteLevelBPETokenizer
|
| 11 |
|
| 12 |
-
dataset = load_dataset("nroggendorff/openhermes", split="train")
|
| 13 |
|
| 14 |
def get_training_corpus():
|
| 15 |
for i in range(0, len(dataset), 1000):
|
|
@@ -59,8 +59,8 @@ print(tokenizer.apply_chat_template([{"role": "user", "content": "Why is the sky
|
|
| 59 |
|
| 60 |
config = LlamaConfig(
|
| 61 |
vocab_size=tokenizer.vocab_size,
|
| 62 |
-
hidden_size=int(512 /
|
| 63 |
-
intermediate_size=int(1024 /
|
| 64 |
num_hidden_layers=int(8 / 4),
|
| 65 |
num_attention_heads=int(8 / 4),
|
| 66 |
max_position_embeddings=int(512 / 1),
|
|
@@ -99,7 +99,7 @@ args = TrainingArguments(
|
|
| 99 |
output_dir="mayo",
|
| 100 |
num_train_epochs=1,
|
| 101 |
gradient_accumulation_steps=4,
|
| 102 |
-
per_device_train_batch_size=
|
| 103 |
learning_rate=1e-5,
|
| 104 |
save_steps=100000,
|
| 105 |
fp16=True,
|
|
|
|
| 9 |
import gc
|
| 10 |
from tokenizers import ByteLevelBPETokenizer
|
| 11 |
|
| 12 |
+
dataset = load_dataset("nroggendorff/openhermes", split="train").select(range(int(1e+5)))
|
| 13 |
|
| 14 |
def get_training_corpus():
|
| 15 |
for i in range(0, len(dataset), 1000):
|
|
|
|
| 59 |
|
| 60 |
config = LlamaConfig(
|
| 61 |
vocab_size=tokenizer.vocab_size,
|
| 62 |
+
hidden_size=int(512 / 8),
|
| 63 |
+
intermediate_size=int(1024 / 8),
|
| 64 |
num_hidden_layers=int(8 / 4),
|
| 65 |
num_attention_heads=int(8 / 4),
|
| 66 |
max_position_embeddings=int(512 / 1),
|
|
|
|
| 99 |
output_dir="mayo",
|
| 100 |
num_train_epochs=1,
|
| 101 |
gradient_accumulation_steps=4,
|
| 102 |
+
per_device_train_batch_size=16,
|
| 103 |
learning_rate=1e-5,
|
| 104 |
save_steps=100000,
|
| 105 |
fp16=True,
|