Spaces:
Running
Running
File size: 305 Bytes
1e5f3d4 | 1 2 3 4 5 6 7 8 9 10 11 12 | import re
def clean_text(text):
text = text.lower()
return re.sub(r"[^a-z0-9 ]", "", text)
def encode_question(q, vocab, max_len=20):
tokens = q.split()
enc = [vocab.get(w, vocab["<UNK>"]) for w in tokens]
enc = enc[:max_len] + [vocab["<PAD>"]] * (max_len - len(enc))
return enc
|