ibrahimmkhalid commited on
Commit
36bf517
·
1 Parent(s): 2ef951d

update extract script

Browse files
Files changed (1) hide show
  1. extract.py +5 -0
extract.py CHANGED
@@ -15,6 +15,11 @@ output_file_train = "./openwebtext/train_split.txt"
15
  output_file_val = "./openwebtext/val_split.txt"
16
  vocab_file = "./openwebtext/vocab.txt"
17
 
 
 
 
 
 
18
  # Extract the tar.xz file
19
  if not os.path.exists(folder_path):
20
  os.mkdir(folder_path)
 
15
  output_file_val = "./openwebtext/val_split.txt"
16
  vocab_file = "./openwebtext/vocab.txt"
17
 
18
+ if not os.path.exists(tarxz_path):
19
+ print("Please download the openwebtext.tar.xz file from:")
20
+ print("https://skylion007.github.io/OpenWebTextCorpus/")
21
+ exit()
22
+
23
  # Extract the tar.xz file
24
  if not os.path.exists(folder_path):
25
  os.mkdir(folder_path)