flpelerin commited on
Commit
c8a8f2e
·
1 Parent(s): 11565c4

Update 2 files

Browse files

- /tokenizer.py
- /trainer.cli.py

Files changed (2) hide show
  1. tokenizer.py +3 -0
  2. trainer.cli.py +1 -1
tokenizer.py CHANGED
@@ -147,6 +147,9 @@ class Tokenizer:
147
 
148
 
149
  def c_compile(self):
 
 
 
150
  # Get the path of the current Python script
151
  script_dir = os.path.dirname(__file__)
152
 
 
147
 
148
 
149
  def c_compile(self):
150
+ import os
151
+ import subprocess
152
+
153
  # Get the path of the current Python script
154
  script_dir = os.path.dirname(__file__)
155
 
trainer.cli.py CHANGED
@@ -28,7 +28,7 @@ if __name__ == '__main__':
28
  dataset = Dataset(config.dataset)
29
 
30
  tokenizer = Tokenizer()
31
- tokenizer.train(dataset.text, max_length=config.tokenizer.max_length)
32
  ids = tokenizer.c_encode(dataset.text)
33
 
34
 
 
28
  dataset = Dataset(config.dataset)
29
 
30
  tokenizer = Tokenizer()
31
+ #tokenizer.train(dataset.text, max_length=config.tokenizer.max_length)
32
  ids = tokenizer.c_encode(dataset.text)
33
 
34