Update tokenizer
Browse files
main.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import math
|
| 2 |
|
| 3 |
similar_letters = {"й": "и", "ё": "е", "e": "е", "t": "т", "i": "l", "o": "о", "k": "к", "3": "з", "a": "а", "x": "х", "c": "с", "m": "м"}
|
| 4 |
-
letters = "qwertyuiopasdfghjklzxcvbnmёйцукенгшщзхъфывапролджэячсмитьбю"
|
| 5 |
|
| 6 |
def countwords(x):
|
| 7 |
temp = {}
|
|
@@ -41,6 +41,8 @@ class Chatbot:
|
|
| 41 |
preprocess += similar_letters[x]
|
| 42 |
else:
|
| 43 |
preprocess += x
|
|
|
|
|
|
|
| 44 |
return preprocess.split()
|
| 45 |
def train(self, data: dict):
|
| 46 |
lendata = len(data)
|
|
|
|
| 1 |
import math
|
| 2 |
|
| 3 |
similar_letters = {"й": "и", "ё": "е", "e": "е", "t": "т", "i": "l", "o": "о", "k": "к", "3": "з", "a": "а", "x": "х", "c": "с", "m": "м"}
|
| 4 |
+
letters = "qwertyuiopasdfghjklzxcvbnmёйцукенгшщзхъфывапролджэячсмитьбю "
|
| 5 |
|
| 6 |
def countwords(x):
|
| 7 |
temp = {}
|
|
|
|
| 41 |
preprocess += similar_letters[x]
|
| 42 |
else:
|
| 43 |
preprocess += x
|
| 44 |
+
else:
|
| 45 |
+
preprocess += " "+x+" "
|
| 46 |
return preprocess.split()
|
| 47 |
def train(self, data: dict):
|
| 48 |
lendata = len(data)
|