flopml
/

mamba

Model card Files Files and versions

flpelerin commited on Aug 24, 2024

Commit

c8a8f2e

·

1 Parent(s): 11565c4

Update 2 files

- /tokenizer.py
- /trainer.cli.py

Files changed (2) hide show

tokenizer.py +3 -0
trainer.cli.py +1 -1

tokenizer.py CHANGED Viewed

@@ -147,6 +147,9 @@ class Tokenizer:
     def c_compile(self):
         # Get the path of the current Python script
         script_dir = os.path.dirname(__file__)

     def c_compile(self):
+        import os
+        import subprocess
         # Get the path of the current Python script
         script_dir = os.path.dirname(__file__)

trainer.cli.py CHANGED Viewed

@@ -28,7 +28,7 @@ if __name__ == '__main__':
     dataset = Dataset(config.dataset)
     tokenizer = Tokenizer()
-    tokenizer.train(dataset.text, max_length=config.tokenizer.max_length)
     ids = tokenizer.c_encode(dataset.text)

     dataset = Dataset(config.dataset)
     tokenizer = Tokenizer()
+    #tokenizer.train(dataset.text, max_length=config.tokenizer.max_length)
     ids = tokenizer.c_encode(dataset.text)