carouselforge / src /lib /parser /pdf.test.ts
CarouselForge Developer
fix: resolve TypeScript and test configuration issues for Phase 13
9a43362
/**
* Tests for PDF parser
*/
import { extractPDFText, chunkPDFText } from './pdf';
describe('PDF Parser', () => {
describe('chunkPDFText', () => {
it('should chunk text by word count', () => {
const words = Array(1000).fill('word');
const text = words.join(' ');
const { chunks, count } = chunkPDFText(text, 500);
// 1000 words at 500 per chunk = 2 chunks
expect(count).toBeGreaterThanOrEqual(1);
expect(chunks.length).toBeGreaterThanOrEqual(1);
});
it('should handle small texts', () => {
const text = 'a b c d e';
const { chunks, count } = chunkPDFText(text, 500);
expect(count).toBe(1);
expect(chunks[0]).toContain('a b c d e');
});
it('should handle empty texts', () => {
const text = '';
const { chunks, count } = chunkPDFText(text, 500);
expect(count).toBe(0);
expect(chunks.length).toBe(0);
});
it('should handle whitespace-only texts', () => {
const text = ' \n\n ';
const { chunks, count } = chunkPDFText(text, 500);
expect(count).toBe(0);
expect(chunks.length).toBe(0);
});
it('should respect max word count per chunk', () => {
const text = Array(100).fill('word').join(' ');
const { chunks } = chunkPDFText(text, 20);
chunks.forEach((chunk) => {
const wordCount = chunk.split(/\s+/).filter((w) => w.length > 0).length;
expect(wordCount).toBeLessThanOrEqual(20);
});
});
});
});