nroggendorff commited on
Commit
b39abab
·
verified ·
1 Parent(s): e1e5f7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -9,7 +9,7 @@ import requests as rq
9
  import gc
10
  from tokenizers import ByteLevelBPETokenizer
11
 
12
- dataset = load_dataset("nroggendorff/openhermes", split="train")#.select(range(int(5e+4)))
13
 
14
  def get_training_corpus():
15
  for i in range(0, len(dataset), 1000):
@@ -59,8 +59,8 @@ print(tokenizer.apply_chat_template([{"role": "user", "content": "Why is the sky
59
 
60
  config = LlamaConfig(
61
  vocab_size=tokenizer.vocab_size,
62
- hidden_size=int(512 / 4),
63
- intermediate_size=int(1024 / 4),
64
  num_hidden_layers=int(8 / 4),
65
  num_attention_heads=int(8 / 4),
66
  max_position_embeddings=int(512 / 1),
@@ -99,7 +99,7 @@ args = TrainingArguments(
99
  output_dir="mayo",
100
  num_train_epochs=1,
101
  gradient_accumulation_steps=4,
102
- per_device_train_batch_size=8,
103
  learning_rate=1e-5,
104
  save_steps=100000,
105
  fp16=True,
 
9
  import gc
10
  from tokenizers import ByteLevelBPETokenizer
11
 
12
+ dataset = load_dataset("nroggendorff/openhermes", split="train").select(range(int(1e+5)))
13
 
14
  def get_training_corpus():
15
  for i in range(0, len(dataset), 1000):
 
59
 
60
  config = LlamaConfig(
61
  vocab_size=tokenizer.vocab_size,
62
+ hidden_size=int(512 / 8),
63
+ intermediate_size=int(1024 / 8),
64
  num_hidden_layers=int(8 / 4),
65
  num_attention_heads=int(8 / 4),
66
  max_position_embeddings=int(512 / 1),
 
99
  output_dir="mayo",
100
  num_train_epochs=1,
101
  gradient_accumulation_steps=4,
102
+ per_device_train_batch_size=16,
103
  learning_rate=1e-5,
104
  save_steps=100000,
105
  fp16=True,