| const tf = require('@tensorflow/tfjs-node'); |
| const tokenService = require('./services/token'); |
| const dictionary = require('./data/words'); |
|
|
| const dataset = dictionary.map(({ word, definition }) => { |
| const encodedDefinition = tokenService.encode(definition.replace(/,;\./g, '').split(' ')); |
| const encodedWord = tokenService.encode([word]); |
| const necessaryPad = encodedDefinition.length - encodedWord.length; |
| const padding = new Array(necessaryPad).fill(0); |
| const finalWord = encodedWord.concat(padding) |
| return { |
| word: finalWord, |
| definition: encodedDefinition, |
| } |
| }) |
| const dictionatySize = 200 |
| const maxInputSentenceSize = dataset.reduce((maxSize, { word, definition }) => { |
| return definition.length > maxSize ? definition.length : maxSize; |
| }, 0); |
|
|
|
|
| async function run() { |
| const model = tf.sequential(); |
| |
| model.add(tf.layers.embedding({inputDim: dictionatySize, outputDim: 64, inputLength: maxInputSentenceSize})); |
| |
| model.add(tf.layers.lstm({units: 128, returnSequences: true})); |
| |
| |
| model.add(tf.layers.dense({units: dictionatySize, activation: 'softmax'})); |
| model.compile({loss: 'categoricalCrossentropy', optimizer: 'adam'}); |
|
|
| |
| const trainingWords = dataset.map(({ word }) => tokenService.padEncoding(word, maxInputSentenceSize)) |
| const trainingDefinitions = dataset.map(({ definition }) => tokenService.padEncoding(definition, maxInputSentenceSize)) |
|
|
| const tensorWords = tf.tensor2d(trainingWords); |
| |
|
|
| const tensorDefinitions = tf.tensor3d( |
| trainingDefinitions.map(def => tokenService.oneHotEncode(def, dictionatySize)), |
| [trainingDefinitions.length, maxInputSentenceSize, dictionatySize] |
| ); |
|
|
| |
| await model.fit(tensorWords, tensorDefinitions, {epochs: 100}); |
|
|
| |
| predict(model, 'Smoker'); |
| } |
|
|
| function predict(model, newWord){ |
| let encodedWord = tokenService.encode([newWord]); |
|
|
| |
| encodedWord = tokenService.padEncoding(encodedWord, maxInputSentenceSize); |
|
|
| |
| const wordTensor = tf.tensor2d([encodedWord]); |
|
|
| |
| const prediction = model.predict(wordTensor); |
|
|
| |
| const predictedTokens = prediction.argMax(2).arraySync()[0]; |
| console.log({ predictedTokens }) |
| const predictedDefinition = predictedTokens.map(tokenId => tokenService.decodeToken(tokenId)).join(' '); |
|
|
| console.log(`Generated Definition for '${newWord}': ${predictedDefinition.replace(/0/g, '').trim()}`); |
| } |
|
|
| run() |
|
|