/**
 * Tests for PDF parser
 */

import { extractPDFText, chunkPDFText } from './pdf';

describe('PDF Parser', () => {
  describe('chunkPDFText', () => {
    it('should chunk text by word count', () => {
      const words = Array(1000).fill('word');
      const text = words.join(' ');
      const { chunks, count } = chunkPDFText(text, 500);

      // 1000 words at 500 per chunk = 2 chunks
      expect(count).toBeGreaterThanOrEqual(1);
      expect(chunks.length).toBeGreaterThanOrEqual(1);
    });

    it('should handle small texts', () => {
      const text = 'a b c d e';
      const { chunks, count } = chunkPDFText(text, 500);

      expect(count).toBe(1);
      expect(chunks[0]).toContain('a b c d e');
    });

    it('should handle empty texts', () => {
      const text = '';
      const { chunks, count } = chunkPDFText(text, 500);

      expect(count).toBe(0);
      expect(chunks.length).toBe(0);
    });

    it('should handle whitespace-only texts', () => {
      const text = '   \n\n  ';
      const { chunks, count } = chunkPDFText(text, 500);

      expect(count).toBe(0);
      expect(chunks.length).toBe(0);
    });

    it('should respect max word count per chunk', () => {
      const text = Array(100).fill('word').join(' ');
      const { chunks } = chunkPDFText(text, 20);

      chunks.forEach((chunk) => {
        const wordCount = chunk.split(/\s+/).filter((w) => w.length > 0).length;
        expect(wordCount).toBeLessThanOrEqual(20);
      });
    });
  });
});