rellow / src /services /token.js
Rafael Camargo
initial commit
698cdee
raw
history blame
1.07 kB
const _public = {};
const dictionary = [];
_public.encode = words => {
return words.map(word => {
const code = findWordCode(word);
if(code !== -1) return code + 1;
dictionary.push(word);
return dictionary.length;
});
};
_public.decode = codes => {
return codes.map(code => {
return dictionary[code - 1];
});
};
_public.decodeToken = code => {
return dictionary[code - 1] || 0;
}
_public.oneHotEncode = (sequence, vocabSize) => {
return sequence.map(tokenId => {
const oneHot = new Array(vocabSize).fill(0);
if (tokenId >= 0 && tokenId < vocabSize) {
oneHot[tokenId] = 1;
}
return oneHot;
});
};
_public.padEncoding = (encoding, minLength) => {
const necessaryPadding = minLength - encoding.length;
if(necessaryPadding > 0) {
const padding = new Array(necessaryPadding).fill(0);
return encoding.concat(padding);
}
return encoding;
};
_public.resetDictionary = () => {
dictionary.length = 0;
};
function findWordCode(word){
return dictionary.indexOf(word);
}
module.exports = _public;