Spaces:
Sleeping
Sleeping
File size: 1,498 Bytes
9a43362 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | /**
* Tests for PDF parser
*/
import { extractPDFText, chunkPDFText } from './pdf';
describe('PDF Parser', () => {
describe('chunkPDFText', () => {
it('should chunk text by word count', () => {
const words = Array(1000).fill('word');
const text = words.join(' ');
const { chunks, count } = chunkPDFText(text, 500);
// 1000 words at 500 per chunk = 2 chunks
expect(count).toBeGreaterThanOrEqual(1);
expect(chunks.length).toBeGreaterThanOrEqual(1);
});
it('should handle small texts', () => {
const text = 'a b c d e';
const { chunks, count } = chunkPDFText(text, 500);
expect(count).toBe(1);
expect(chunks[0]).toContain('a b c d e');
});
it('should handle empty texts', () => {
const text = '';
const { chunks, count } = chunkPDFText(text, 500);
expect(count).toBe(0);
expect(chunks.length).toBe(0);
});
it('should handle whitespace-only texts', () => {
const text = ' \n\n ';
const { chunks, count } = chunkPDFText(text, 500);
expect(count).toBe(0);
expect(chunks.length).toBe(0);
});
it('should respect max word count per chunk', () => {
const text = Array(100).fill('word').join(' ');
const { chunks } = chunkPDFText(text, 20);
chunks.forEach((chunk) => {
const wordCount = chunk.split(/\s+/).filter((w) => w.length > 0).length;
expect(wordCount).toBeLessThanOrEqual(20);
});
});
});
});
|