Update file tokenizer.py
Browse files- tokenizer.py +16 -7
tokenizer.py
CHANGED
|
@@ -147,20 +147,28 @@ class Tokenizer:
|
|
| 147 |
|
| 148 |
|
| 149 |
|
| 150 |
-
def c_compile(self, c_dir):
|
| 151 |
-
import os
|
| 152 |
-
import subprocess
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
| 156 |
|
| 157 |
-
|
|
|
|
| 158 |
c_dir = os.path.join(script_dir, c_dir)
|
| 159 |
|
| 160 |
-
# Invoke Make on the c_tokenizer directory
|
| 161 |
subprocess.run(['make'], cwd=c_dir)
|
| 162 |
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
def c_encode(self, text): #TODO: Implement
|
| 166 |
c_dir = "c_tokenizer"
|
|
@@ -168,5 +176,6 @@ class Tokenizer:
|
|
| 168 |
|
| 169 |
self.to_file(c_dir + c_file)
|
| 170 |
self.c_compile(c_dir)
|
|
|
|
| 171 |
|
| 172 |
return [1, 2, 3, 4]
|
|
|
|
| 147 |
|
| 148 |
|
| 149 |
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
+
# Weak part of the project. Maybe implement a handler?
|
| 152 |
+
|
| 153 |
+
import os
|
| 154 |
+
import subprocess
|
| 155 |
|
| 156 |
+
def c_compile(self, c_dir):
|
| 157 |
+
script_dir = os.path.dirname(__file__)
|
| 158 |
c_dir = os.path.join(script_dir, c_dir)
|
| 159 |
|
|
|
|
| 160 |
subprocess.run(['make'], cwd=c_dir)
|
| 161 |
|
| 162 |
|
| 163 |
+
def c_run(self, c_dir, text):
|
| 164 |
+
script_dir = os.path.dirname(__file__)
|
| 165 |
+
c_dir = os.path.join(script_dir, c_dir)
|
| 166 |
+
|
| 167 |
+
with open(c_dir.join('dataset.txt'), 'w'):
|
| 168 |
+
f.write(text)
|
| 169 |
+
|
| 170 |
+
subprocess.run(['./a.out dataset.txt tokenizer.bin'], cwd=c_dir)
|
| 171 |
+
|
| 172 |
|
| 173 |
def c_encode(self, text): #TODO: Implement
|
| 174 |
c_dir = "c_tokenizer"
|
|
|
|
| 176 |
|
| 177 |
self.to_file(c_dir + c_file)
|
| 178 |
self.c_compile(c_dir)
|
| 179 |
+
self.c_run(c_dir)
|
| 180 |
|
| 181 |
return [1, 2, 3, 4]
|