flopml
/

mamba

flpelerin commited on Aug 24, 2024

Commit

12bdec8

1 Parent(s): ab0fa9f

Update file tokenizer.py

Files changed (1) hide show

tokenizer.py CHANGED Viewed

@@ -147,20 +147,28 @@ class Tokenizer:
-    def c_compile(self, c_dir):
-        import os
-        import subprocess
-        # Get the path of the current Python script
-        script_dir = os.path.dirname(__file__)
-        # Construct the path to the c_tokenizer directory
         c_dir = os.path.join(script_dir, c_dir)
-        # Invoke Make on the c_tokenizer directory
         subprocess.run(['make'], cwd=c_dir)
     def c_encode(self, text): #TODO: Implement
         c_dir = "c_tokenizer"
@@ -168,5 +176,6 @@ class Tokenizer:
         self.to_file(c_dir + c_file)
         self.c_compile(c_dir)
         return [1, 2, 3, 4]

+    # Weak part of the project. Maybe implement a handler?
+    import os
+    import subprocess
+    def c_compile(self, c_dir):
+        script_dir = os.path.dirname(__file__)
         c_dir = os.path.join(script_dir, c_dir)
         subprocess.run(['make'], cwd=c_dir)
+    def c_run(self, c_dir, text):
+        script_dir = os.path.dirname(__file__)
+        c_dir = os.path.join(script_dir, c_dir)
+        with open(c_dir.join('dataset.txt'), 'w'):
+            f.write(text)
+        subprocess.run(['./a.out dataset.txt tokenizer.bin'], cwd=c_dir)
     def c_encode(self, text): #TODO: Implement
         c_dir = "c_tokenizer"
         self.to_file(c_dir + c_file)
         self.c_compile(c_dir)
+        self.c_run(c_dir)
         return [1, 2, 3, 4]