Spaces:
Running
Running
Commit
·
1c8791f
1
Parent(s):
d37581a
update vocab file loc
Browse files- extract.py +1 -1
- train_gpt_openwebtext.py +2 -2
extract.py
CHANGED
|
@@ -13,7 +13,7 @@ tarxz_path = "./openwebtext.tar.xz"
|
|
| 13 |
folder_path = "./openwebtext"
|
| 14 |
output_file_train = "./openwebtext/train_split.txt"
|
| 15 |
output_file_val = "./openwebtext/val_split.txt"
|
| 16 |
-
vocab_file = "./
|
| 17 |
|
| 18 |
if not os.path.exists(tarxz_path):
|
| 19 |
print("Please download the openwebtext.tar.xz file from:")
|
|
|
|
| 13 |
folder_path = "./openwebtext"
|
| 14 |
output_file_train = "./openwebtext/train_split.txt"
|
| 15 |
output_file_val = "./openwebtext/val_split.txt"
|
| 16 |
+
vocab_file = "./vocab.txt"
|
| 17 |
|
| 18 |
if not os.path.exists(tarxz_path):
|
| 19 |
print("Please download the openwebtext.tar.xz file from:")
|
train_gpt_openwebtext.py
CHANGED
|
@@ -19,10 +19,10 @@ n_head = 8
|
|
| 19 |
n_layer = 8
|
| 20 |
dropout = 0.2
|
| 21 |
|
| 22 |
-
if not os.path.exists("./
|
| 23 |
raise Exception("Please run extract.py first")
|
| 24 |
chars = ""
|
| 25 |
-
with open("./
|
| 26 |
text = f.read()
|
| 27 |
chars = sorted(list(set(text)))
|
| 28 |
|
|
|
|
| 19 |
n_layer = 8
|
| 20 |
dropout = 0.2
|
| 21 |
|
| 22 |
+
if not os.path.exists("./vocab.txt") or not os.path.exists("./openwebtext/train_split.txt") or not os.path.exists("./openwebtext/val_split.txt"):
|
| 23 |
raise Exception("Please run extract.py first")
|
| 24 |
chars = ""
|
| 25 |
+
with open("./vocab.txt", 'r', encoding='utf-8') as f:
|
| 26 |
text = f.read()
|
| 27 |
chars = sorted(list(set(text)))
|
| 28 |
|