import documentProcessor from '../document-processor'; import geminiClient from '../gemini'; import redisClient from '../redis'; import { getAllViolationPatterns } from '../housing-law-database'; // Mock dependencies for integration testing jest.mock('../gemini'); jest.mock('../redis'); jest.mock('../housing-law-database'); const mockGeminiClient = geminiClient as jest.Mocked; const mockRedisClient = redisClient as jest.Mocked; describe('DocumentProcessor Integration Tests', () => { const sampleLeaseText = ` RESIDENTIAL LEASE AGREEMENT ARTICLE 1: RENT PAYMENT Tenant agrees to pay monthly rent of $2,500 due on the first of each month. ARTICLE 2: SECURITY DEPOSIT Tenant shall provide a security deposit equal to three months' rent ($7,500). ARTICLE 3: REPAIRS AND MAINTENANCE Tenant waives any claims for repairs and maintenance of the premises. ARTICLE 4: LANDLORD ENTRY Landlord may enter premises at any time without notice. ARTICLE 5: TERMINATION Tenant waives right to contest eviction in court. `; const mockExtractedClauses = [ { text: 'Tenant agrees to pay monthly rent of $2,500 due on the first of each month.', section: 'Rent & Payment' }, { text: 'Tenant shall provide a security deposit equal to three months\' rent ($7,500).', section: 'Security Deposit' }, { text: 'Tenant waives any claims for repairs and maintenance of the premises.', section: 'Repairs & Maintenance' }, { text: 'Landlord may enter premises at any time without notice.', section: 'Landlord Entry' }, { text: 'Tenant waives right to contest eviction in court.', section: 'Termination & Eviction' } ]; beforeEach(() => { jest.clearAllMocks(); // Mock Redis client mockRedisClient.getClient.mockReturnValue({ json: { set: jest.fn().mockResolvedValue(undefined), get: jest.fn().mockResolvedValue(null), }, expire: jest.fn().mockResolvedValue(undefined), ft: { search: jest.fn().mockResolvedValue([]), create: jest.fn().mockResolvedValue(undefined), info: jest.fn().mockResolvedValue(false), }, ping: jest.fn().mockResolvedValue('PONG'), } as any); // Mock Gemini client mockGeminiClient.extractClauses.mockResolvedValue(mockExtractedClauses); mockGeminiClient.generateEmbedding.mockResolvedValue([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]); // Mock housing law database with valid JavaScript regex patterns (getAllViolationPatterns as jest.Mock).mockReturnValue([ { id: 'CRIT-001', violation_type: 'Excessive Security Deposit', severity: 'Critical', illegal_clause_pattern: 'Security deposit exceeding one month\'s rent', description: 'Any lease clause requiring security deposit greater than one month\'s rent', legal_violation: 'NYC Housing Maintenance Code §27-2056', example_illegal_clause: 'Tenant agrees to pay security deposit equal to two months\' rent', legal_standard: 'Maximum security deposit is one month\'s rent', penalties: 'Tenant can recover excess amount plus interest', detection_regex: '(security\\s+deposit|deposit).*(?:two|2|three|3|four|4).*(month|rent)', source: 'https://rentguidelinesboard.cityofnewyork.us/resources/faqs/security-deposits/', hpd_violation_code: 'SEC-DEP-01' }, { id: 'CRIT-002', violation_type: 'Repair Responsibility Waiver', severity: 'Critical', illegal_clause_pattern: 'Waiving landlord\'s duty to maintain premises', description: 'Clauses that require tenant to waive landlord\'s obligation to maintain habitability', legal_violation: 'NYC Housing Maintenance Code Article 1, Warranty of Habitability', example_illegal_clause: 'Tenant waives any claims for repairs and maintenance', legal_standard: 'Landlord cannot waive duty to maintain habitable conditions', penalties: 'Clause is void; tenant retains all habitability rights', detection_regex: '(waive|waiver|waiving).*(repair|maintenance|habitab|condition)', source: 'NY Real Property Law §235-b', hpd_violation_code: 'HAB-WAIV-01' }, { id: 'CRIT-003', violation_type: 'Right to Court Waiver', severity: 'Critical', illegal_clause_pattern: 'Waiving tenant\'s right to court proceedings', description: 'Clauses requiring tenant to waive right to appear in housing court', legal_violation: 'Due Process Clause, RPAPL', example_illegal_clause: 'Tenant waives right to contest eviction in court', legal_standard: 'Constitutional right to due process cannot be waived', penalties: 'Clause is void and unenforceable', detection_regex: '(waive|waiver).*(court|legal|proceeding|contest)', source: 'US Constitution 14th Amendment, NY Constitution', hpd_violation_code: 'COURT-WAIV-01' } ]); }); describe('Complete Document Processing Pipeline', () => { it('should process a lease document and detect multiple violations', async () => { const leaseFile = new File([sampleLeaseText], 'lease.pdf', { type: 'application/pdf' }); const leaseId = 'integration-test-lease-123'; // Mock text extraction const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF') .mockResolvedValue(sampleLeaseText); const result = await documentProcessor.processDocument(leaseFile, leaseId); // Verify the complete pipeline expect(result.leaseId).toBe(leaseId); expect(result.clauses).toHaveLength(5); expect(result.violations).toHaveLength(3); // Should detect 3 critical violations // Verify clause processing const securityDepositClause = result.clauses.find(c => c.text.includes('security deposit')); expect(securityDepositClause).toBeDefined(); expect(securityDepositClause?.metadata.flagged).toBe(true); expect(securityDepositClause?.metadata.severity).toBe('Critical'); const repairWaiverClause = result.clauses.find(c => c.text.includes('waives any claims for repairs')); expect(repairWaiverClause).toBeDefined(); expect(repairWaiverClause?.metadata.flagged).toBe(true); expect(repairWaiverClause?.metadata.severity).toBe('Critical'); const courtWaiverClause = result.clauses.find(c => c.text.includes('waives right to contest eviction')); expect(courtWaiverClause).toBeDefined(); expect(courtWaiverClause?.metadata.flagged).toBe(true); expect(courtWaiverClause?.metadata.severity).toBe('Critical'); // Verify summary statistics expect(result.summary.totalClauses).toBe(5); expect(result.summary.flaggedClauses).toBe(3); expect(result.summary.criticalViolations).toBe(3); expect(result.summary.highViolations).toBe(0); expect(result.summary.mediumViolations).toBe(0); expect(result.summary.lowViolations).toBe(0); // Verify Redis storage was called expect(mockRedisClient.getClient().json.set).toHaveBeenCalledTimes(6); // 5 clauses + 1 lease metadata expect(mockRedisClient.getClient().expire).toHaveBeenCalledTimes(5); // 5 clauses with expiration // Verify Gemini integration expect(mockGeminiClient.extractClauses).toHaveBeenCalledWith(sampleLeaseText); // Note: generateEmbedding is called for each clause during processing expect(mockGeminiClient.generateEmbedding).toHaveBeenCalled(); extractTextSpy.mockRestore(); }); it('should handle a lease with no violations', async () => { const cleanLeaseText = ` RESIDENTIAL LEASE AGREEMENT ARTICLE 1: RENT PAYMENT Tenant agrees to pay monthly rent of $2,000 due on the first of each month. ARTICLE 2: SECURITY DEPOSIT Tenant shall provide a security deposit equal to one month's rent ($2,000). ARTICLE 3: REPAIRS AND MAINTENANCE Landlord is responsible for maintaining the premises in habitable condition. `; const cleanExtractedClauses = [ { text: 'Tenant agrees to pay monthly rent of $2,000 due on the first of each month.', section: 'Rent & Payment' }, { text: 'Tenant shall provide a security deposit equal to one month\'s rent ($2,000).', section: 'Security Deposit' }, { text: 'Landlord is responsible for maintaining the premises in habitable condition.', section: 'Repairs & Maintenance' } ]; mockGeminiClient.extractClauses.mockResolvedValue(cleanExtractedClauses); const leaseFile = new File([cleanLeaseText], 'clean-lease.pdf', { type: 'application/pdf' }); const leaseId = 'clean-lease-test-123'; const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF') .mockResolvedValue(cleanLeaseText); const result = await documentProcessor.processDocument(leaseFile, leaseId); // Verify no violations detected expect(result.violations).toHaveLength(0); expect(result.summary.flaggedClauses).toBe(0); expect(result.summary.criticalViolations).toBe(0); // Verify all clauses are marked as compliant result.clauses.forEach(clause => { expect(clause.metadata.flagged).toBe(false); expect(clause.metadata.confidence).toBe(0.0); }); extractTextSpy.mockRestore(); }); it('should process image files with OCR', async () => { const imageLeaseText = 'This is a scanned lease document with rent terms.'; const imageExtractedClauses = [ { text: 'This is a scanned lease document with rent terms.', section: 'General' } ]; mockGeminiClient.extractClauses.mockResolvedValue(imageExtractedClauses); const imageFile = new File(['image data'], 'lease.jpg', { type: 'image/jpeg' }); const leaseId = 'image-lease-test-123'; const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromImage') .mockResolvedValue(imageLeaseText); const result = await documentProcessor.processDocument(imageFile, leaseId); expect(result.leaseId).toBe(leaseId); expect(result.clauses).toHaveLength(1); expect(extractTextSpy).toHaveBeenCalledWith(imageFile); extractTextSpy.mockRestore(); }); it('should handle large documents with pagination', async () => { // Create a large document that would trigger pagination const largeLeaseText = 'Large lease document. '.repeat(1000); // ~20,000 characters const largeExtractedClauses = Array.from({ length: 20 }, (_, i) => ({ text: `Clause ${i + 1}: This is a sample clause in the large lease document.`, section: 'General' })); mockGeminiClient.extractClauses.mockResolvedValue(largeExtractedClauses); const largeFile = new File([largeLeaseText], 'large-lease.pdf', { type: 'application/pdf' }); const leaseId = 'large-lease-test-123'; const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF') .mockResolvedValue(largeLeaseText); const result = await documentProcessor.processDocument(largeFile, leaseId); expect(result.clauses).toHaveLength(20); expect(result.summary.totalClauses).toBe(20); // Note: generateEmbedding is called for each clause during processing expect(mockGeminiClient.generateEmbedding).toHaveBeenCalled(); extractTextSpy.mockRestore(); }); }); describe('Error Handling and Resilience', () => { it('should continue processing when some clauses fail embedding generation', async () => { const leaseFile = new File([sampleLeaseText], 'lease.pdf', { type: 'application/pdf' }); const leaseId = 'error-test-lease-123'; const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF') .mockResolvedValue(sampleLeaseText); // Mock embedding generation to fail for some clauses mockGeminiClient.generateEmbedding .mockResolvedValueOnce([0.1, 0.2, 0.3]) // First clause succeeds .mockRejectedValueOnce(new Error('Embedding failed')) // Second clause fails .mockResolvedValueOnce([0.4, 0.5, 0.6]); // Third clause succeeds const result = await documentProcessor.processDocument(leaseFile, leaseId); // Should still process successfully, skipping failed clauses expect(result.leaseId).toBe(leaseId); expect(result.clauses.length).toBeGreaterThan(0); extractTextSpy.mockRestore(); }); it('should handle Redis storage failures gracefully', async () => { const leaseFile = new File([sampleLeaseText], 'lease.pdf', { type: 'application/pdf' }); const leaseId = 'redis-error-test-123'; const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF') .mockResolvedValue(sampleLeaseText); // Mock Redis storage to fail mockRedisClient.getClient().json.set.mockRejectedValue(new Error('Redis storage failed')); // The current implementation handles Redis failures gracefully const result = await documentProcessor.processDocument(leaseFile, leaseId); // Should still return analysis results even if storage fails expect(result.leaseId).toBe(leaseId); expect(result.clauses).toHaveLength(5); expect(result.violations).toHaveLength(3); extractTextSpy.mockRestore(); }); }); describe('Performance and Scalability', () => { it('should process documents within reasonable time limits', async () => { const leaseFile = new File([sampleLeaseText], 'lease.pdf', { type: 'application/pdf' }); const leaseId = 'performance-test-123'; const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF') .mockResolvedValue(sampleLeaseText); const startTime = Date.now(); const result = await documentProcessor.processDocument(leaseFile, leaseId); const endTime = Date.now(); const processingTime = endTime - startTime; // Should complete within 5 seconds (allowing for test overhead) expect(processingTime).toBeLessThan(5000); expect(result.leaseId).toBe(leaseId); extractTextSpy.mockRestore(); }); }); });