flpelerin commited on
Commit
12bdec8
·
1 Parent(s): ab0fa9f

Update file tokenizer.py

Browse files
Files changed (1) hide show
  1. tokenizer.py +16 -7
tokenizer.py CHANGED
@@ -147,20 +147,28 @@ class Tokenizer:
147
 
148
 
149
 
150
- def c_compile(self, c_dir):
151
- import os
152
- import subprocess
153
 
154
- # Get the path of the current Python script
155
- script_dir = os.path.dirname(__file__)
 
 
156
 
157
- # Construct the path to the c_tokenizer directory
 
158
  c_dir = os.path.join(script_dir, c_dir)
159
 
160
- # Invoke Make on the c_tokenizer directory
161
  subprocess.run(['make'], cwd=c_dir)
162
 
163
 
 
 
 
 
 
 
 
 
 
164
 
165
  def c_encode(self, text): #TODO: Implement
166
  c_dir = "c_tokenizer"
@@ -168,5 +176,6 @@ class Tokenizer:
168
 
169
  self.to_file(c_dir + c_file)
170
  self.c_compile(c_dir)
 
171
 
172
  return [1, 2, 3, 4]
 
147
 
148
 
149
 
 
 
 
150
 
151
+ # Weak part of the project. Maybe implement a handler?
152
+
153
+ import os
154
+ import subprocess
155
 
156
+ def c_compile(self, c_dir):
157
+ script_dir = os.path.dirname(__file__)
158
  c_dir = os.path.join(script_dir, c_dir)
159
 
 
160
  subprocess.run(['make'], cwd=c_dir)
161
 
162
 
163
+ def c_run(self, c_dir, text):
164
+ script_dir = os.path.dirname(__file__)
165
+ c_dir = os.path.join(script_dir, c_dir)
166
+
167
+ with open(c_dir.join('dataset.txt'), 'w'):
168
+ f.write(text)
169
+
170
+ subprocess.run(['./a.out dataset.txt tokenizer.bin'], cwd=c_dir)
171
+
172
 
173
  def c_encode(self, text): #TODO: Implement
174
  c_dir = "c_tokenizer"
 
176
 
177
  self.to_file(c_dir + c_file)
178
  self.c_compile(c_dir)
179
+ self.c_run(c_dir)
180
 
181
  return [1, 2, 3, 4]