| const tokenSplit = require('./tokenSplit'); | |
| describe('tokenSplit', () => { | |
| const text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam id.'; | |
| it('returns correct text chunks with provided parameters', async () => { | |
| const result = await tokenSplit({ | |
| text: text, | |
| encodingName: 'gpt2', | |
| chunkSize: 2, | |
| chunkOverlap: 1, | |
| returnSize: 5, | |
| }); | |
| expect(result).toEqual(['. Null', ' Nullam', 'am id', ' id.', '.']); | |
| }); | |
| it('returns correct text chunks with default parameters', async () => { | |
| const result = await tokenSplit({ text }); | |
| expect(result).toEqual([ | |
| 'Lorem', | |
| ' ipsum', | |
| ' dolor', | |
| ' sit', | |
| ' amet', | |
| ',', | |
| ' consectetur', | |
| ' adipiscing', | |
| ' elit', | |
| '.', | |
| ' Null', | |
| 'am', | |
| ' id', | |
| '.', | |
| ]); | |
| }); | |
| it('returns correct text chunks with specific return size', async () => { | |
| const result = await tokenSplit({ text, returnSize: 2 }); | |
| expect(result.length).toEqual(2); | |
| expect(result).toEqual([' id', '.']); | |
| }); | |
| it('returns correct text chunks with specified chunk size', async () => { | |
| const result = await tokenSplit({ text, chunkSize: 10 }); | |
| expect(result).toEqual([ | |
| 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.', | |
| ' Nullam id.', | |
| ]); | |
| }); | |
| it('returns empty array with no text', async () => { | |
| const result = await tokenSplit({ text: '' }); | |
| expect(result).toEqual([]); | |
| }); | |
| }); | |