File size: 14,382 Bytes
ea8da24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
import documentProcessor from '../document-processor';
import geminiClient from '../gemini';
import redisClient from '../redis';
import { getAllViolationPatterns } from '../housing-law-database';

// Mock dependencies for integration testing
jest.mock('../gemini');
jest.mock('../redis');
jest.mock('../housing-law-database');

const mockGeminiClient = geminiClient as jest.Mocked<typeof geminiClient>;
const mockRedisClient = redisClient as jest.Mocked<typeof redisClient>;

describe('DocumentProcessor Integration Tests', () => {
  const sampleLeaseText = `
    RESIDENTIAL LEASE AGREEMENT
    
    ARTICLE 1: RENT PAYMENT
    Tenant agrees to pay monthly rent of $2,500 due on the first of each month.
    
    ARTICLE 2: SECURITY DEPOSIT
    Tenant shall provide a security deposit equal to three months' rent ($7,500).
    
    ARTICLE 3: REPAIRS AND MAINTENANCE
    Tenant waives any claims for repairs and maintenance of the premises.
    
    ARTICLE 4: LANDLORD ENTRY
    Landlord may enter premises at any time without notice.
    
    ARTICLE 5: TERMINATION
    Tenant waives right to contest eviction in court.
  `;

  const mockExtractedClauses = [
    { text: 'Tenant agrees to pay monthly rent of $2,500 due on the first of each month.', section: 'Rent & Payment' },
    { text: 'Tenant shall provide a security deposit equal to three months\' rent ($7,500).', section: 'Security Deposit' },
    { text: 'Tenant waives any claims for repairs and maintenance of the premises.', section: 'Repairs & Maintenance' },
    { text: 'Landlord may enter premises at any time without notice.', section: 'Landlord Entry' },
    { text: 'Tenant waives right to contest eviction in court.', section: 'Termination & Eviction' }
  ];

  beforeEach(() => {
    jest.clearAllMocks();
    
    // Mock Redis client
    mockRedisClient.getClient.mockReturnValue({
      json: {
        set: jest.fn().mockResolvedValue(undefined),
        get: jest.fn().mockResolvedValue(null),
      },
      expire: jest.fn().mockResolvedValue(undefined),
      ft: {
        search: jest.fn().mockResolvedValue([]),
        create: jest.fn().mockResolvedValue(undefined),
        info: jest.fn().mockResolvedValue(false),
      },
      ping: jest.fn().mockResolvedValue('PONG'),
    } as any);

    // Mock Gemini client
    mockGeminiClient.extractClauses.mockResolvedValue(mockExtractedClauses);
    mockGeminiClient.generateEmbedding.mockResolvedValue([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]);

    // Mock housing law database with valid JavaScript regex patterns
    (getAllViolationPatterns as jest.Mock).mockReturnValue([
      {
        id: 'CRIT-001',
        violation_type: 'Excessive Security Deposit',
        severity: 'Critical',
        illegal_clause_pattern: 'Security deposit exceeding one month\'s rent',
        description: 'Any lease clause requiring security deposit greater than one month\'s rent',
        legal_violation: 'NYC Housing Maintenance Code §27-2056',
        example_illegal_clause: 'Tenant agrees to pay security deposit equal to two months\' rent',
        legal_standard: 'Maximum security deposit is one month\'s rent',
        penalties: 'Tenant can recover excess amount plus interest',
        detection_regex: '(security\\s+deposit|deposit).*(?:two|2|three|3|four|4).*(month|rent)',
        source: 'https://rentguidelinesboard.cityofnewyork.us/resources/faqs/security-deposits/',
        hpd_violation_code: 'SEC-DEP-01'
      },
      {
        id: 'CRIT-002',
        violation_type: 'Repair Responsibility Waiver',
        severity: 'Critical',
        illegal_clause_pattern: 'Waiving landlord\'s duty to maintain premises',
        description: 'Clauses that require tenant to waive landlord\'s obligation to maintain habitability',
        legal_violation: 'NYC Housing Maintenance Code Article 1, Warranty of Habitability',
        example_illegal_clause: 'Tenant waives any claims for repairs and maintenance',
        legal_standard: 'Landlord cannot waive duty to maintain habitable conditions',
        penalties: 'Clause is void; tenant retains all habitability rights',
        detection_regex: '(waive|waiver|waiving).*(repair|maintenance|habitab|condition)',
        source: 'NY Real Property Law §235-b',
        hpd_violation_code: 'HAB-WAIV-01'
      },
      {
        id: 'CRIT-003',
        violation_type: 'Right to Court Waiver',
        severity: 'Critical',
        illegal_clause_pattern: 'Waiving tenant\'s right to court proceedings',
        description: 'Clauses requiring tenant to waive right to appear in housing court',
        legal_violation: 'Due Process Clause, RPAPL',
        example_illegal_clause: 'Tenant waives right to contest eviction in court',
        legal_standard: 'Constitutional right to due process cannot be waived',
        penalties: 'Clause is void and unenforceable',
        detection_regex: '(waive|waiver).*(court|legal|proceeding|contest)',
        source: 'US Constitution 14th Amendment, NY Constitution',
        hpd_violation_code: 'COURT-WAIV-01'
      }
    ]);
  });

  describe('Complete Document Processing Pipeline', () => {
    it('should process a lease document and detect multiple violations', async () => {
      const leaseFile = new File([sampleLeaseText], 'lease.pdf', { type: 'application/pdf' });
      const leaseId = 'integration-test-lease-123';

      // Mock text extraction
      const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF')
        .mockResolvedValue(sampleLeaseText);

      const result = await documentProcessor.processDocument(leaseFile, leaseId);

      // Verify the complete pipeline
      expect(result.leaseId).toBe(leaseId);
      expect(result.clauses).toHaveLength(5);
      expect(result.violations).toHaveLength(3); // Should detect 3 critical violations

      // Verify clause processing
      const securityDepositClause = result.clauses.find(c => c.text.includes('security deposit'));
      expect(securityDepositClause).toBeDefined();
      expect(securityDepositClause?.metadata.flagged).toBe(true);
      expect(securityDepositClause?.metadata.severity).toBe('Critical');

      const repairWaiverClause = result.clauses.find(c => c.text.includes('waives any claims for repairs'));
      expect(repairWaiverClause).toBeDefined();
      expect(repairWaiverClause?.metadata.flagged).toBe(true);
      expect(repairWaiverClause?.metadata.severity).toBe('Critical');

      const courtWaiverClause = result.clauses.find(c => c.text.includes('waives right to contest eviction'));
      expect(courtWaiverClause).toBeDefined();
      expect(courtWaiverClause?.metadata.flagged).toBe(true);
      expect(courtWaiverClause?.metadata.severity).toBe('Critical');

      // Verify summary statistics
      expect(result.summary.totalClauses).toBe(5);
      expect(result.summary.flaggedClauses).toBe(3);
      expect(result.summary.criticalViolations).toBe(3);
      expect(result.summary.highViolations).toBe(0);
      expect(result.summary.mediumViolations).toBe(0);
      expect(result.summary.lowViolations).toBe(0);

      // Verify Redis storage was called
      expect(mockRedisClient.getClient().json.set).toHaveBeenCalledTimes(6); // 5 clauses + 1 lease metadata
      expect(mockRedisClient.getClient().expire).toHaveBeenCalledTimes(5); // 5 clauses with expiration

      // Verify Gemini integration
      expect(mockGeminiClient.extractClauses).toHaveBeenCalledWith(sampleLeaseText);
      // Note: generateEmbedding is called for each clause during processing
      expect(mockGeminiClient.generateEmbedding).toHaveBeenCalled();

      extractTextSpy.mockRestore();
    });

    it('should handle a lease with no violations', async () => {
      const cleanLeaseText = `
        RESIDENTIAL LEASE AGREEMENT
        
        ARTICLE 1: RENT PAYMENT
        Tenant agrees to pay monthly rent of $2,000 due on the first of each month.
        
        ARTICLE 2: SECURITY DEPOSIT
        Tenant shall provide a security deposit equal to one month's rent ($2,000).
        
        ARTICLE 3: REPAIRS AND MAINTENANCE
        Landlord is responsible for maintaining the premises in habitable condition.
      `;

      const cleanExtractedClauses = [
        { text: 'Tenant agrees to pay monthly rent of $2,000 due on the first of each month.', section: 'Rent & Payment' },
        { text: 'Tenant shall provide a security deposit equal to one month\'s rent ($2,000).', section: 'Security Deposit' },
        { text: 'Landlord is responsible for maintaining the premises in habitable condition.', section: 'Repairs & Maintenance' }
      ];

      mockGeminiClient.extractClauses.mockResolvedValue(cleanExtractedClauses);

      const leaseFile = new File([cleanLeaseText], 'clean-lease.pdf', { type: 'application/pdf' });
      const leaseId = 'clean-lease-test-123';

      const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF')
        .mockResolvedValue(cleanLeaseText);

      const result = await documentProcessor.processDocument(leaseFile, leaseId);

      // Verify no violations detected
      expect(result.violations).toHaveLength(0);
      expect(result.summary.flaggedClauses).toBe(0);
      expect(result.summary.criticalViolations).toBe(0);

      // Verify all clauses are marked as compliant
      result.clauses.forEach(clause => {
        expect(clause.metadata.flagged).toBe(false);
        expect(clause.metadata.confidence).toBe(0.0);
      });

      extractTextSpy.mockRestore();
    });

    it('should process image files with OCR', async () => {
      const imageLeaseText = 'This is a scanned lease document with rent terms.';
      const imageExtractedClauses = [
        { text: 'This is a scanned lease document with rent terms.', section: 'General' }
      ];

      mockGeminiClient.extractClauses.mockResolvedValue(imageExtractedClauses);

      const imageFile = new File(['image data'], 'lease.jpg', { type: 'image/jpeg' });
      const leaseId = 'image-lease-test-123';

      const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromImage')
        .mockResolvedValue(imageLeaseText);

      const result = await documentProcessor.processDocument(imageFile, leaseId);

      expect(result.leaseId).toBe(leaseId);
      expect(result.clauses).toHaveLength(1);
      expect(extractTextSpy).toHaveBeenCalledWith(imageFile);

      extractTextSpy.mockRestore();
    });

    it('should handle large documents with pagination', async () => {
      // Create a large document that would trigger pagination
      const largeLeaseText = 'Large lease document. '.repeat(1000); // ~20,000 characters
      const largeExtractedClauses = Array.from({ length: 20 }, (_, i) => ({
        text: `Clause ${i + 1}: This is a sample clause in the large lease document.`,
        section: 'General'
      }));

      mockGeminiClient.extractClauses.mockResolvedValue(largeExtractedClauses);

      const largeFile = new File([largeLeaseText], 'large-lease.pdf', { type: 'application/pdf' });
      const leaseId = 'large-lease-test-123';

      const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF')
        .mockResolvedValue(largeLeaseText);

      const result = await documentProcessor.processDocument(largeFile, leaseId);

      expect(result.clauses).toHaveLength(20);
      expect(result.summary.totalClauses).toBe(20);
      // Note: generateEmbedding is called for each clause during processing
      expect(mockGeminiClient.generateEmbedding).toHaveBeenCalled();

      extractTextSpy.mockRestore();
    });
  });

  describe('Error Handling and Resilience', () => {
    it('should continue processing when some clauses fail embedding generation', async () => {
      const leaseFile = new File([sampleLeaseText], 'lease.pdf', { type: 'application/pdf' });
      const leaseId = 'error-test-lease-123';

      const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF')
        .mockResolvedValue(sampleLeaseText);

      // Mock embedding generation to fail for some clauses
      mockGeminiClient.generateEmbedding
        .mockResolvedValueOnce([0.1, 0.2, 0.3]) // First clause succeeds
        .mockRejectedValueOnce(new Error('Embedding failed')) // Second clause fails
        .mockResolvedValueOnce([0.4, 0.5, 0.6]); // Third clause succeeds

      const result = await documentProcessor.processDocument(leaseFile, leaseId);

      // Should still process successfully, skipping failed clauses
      expect(result.leaseId).toBe(leaseId);
      expect(result.clauses.length).toBeGreaterThan(0);

      extractTextSpy.mockRestore();
    });

    it('should handle Redis storage failures gracefully', async () => {
      const leaseFile = new File([sampleLeaseText], 'lease.pdf', { type: 'application/pdf' });
      const leaseId = 'redis-error-test-123';

      const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF')
        .mockResolvedValue(sampleLeaseText);

      // Mock Redis storage to fail
      mockRedisClient.getClient().json.set.mockRejectedValue(new Error('Redis storage failed'));

      // The current implementation handles Redis failures gracefully
      const result = await documentProcessor.processDocument(leaseFile, leaseId);
      
      // Should still return analysis results even if storage fails
      expect(result.leaseId).toBe(leaseId);
      expect(result.clauses).toHaveLength(5);
      expect(result.violations).toHaveLength(3);

      extractTextSpy.mockRestore();
    });
  });

  describe('Performance and Scalability', () => {
    it('should process documents within reasonable time limits', async () => {
      const leaseFile = new File([sampleLeaseText], 'lease.pdf', { type: 'application/pdf' });
      const leaseId = 'performance-test-123';

      const extractTextSpy = jest.spyOn(documentProcessor as any, 'extractTextFromPDF')
        .mockResolvedValue(sampleLeaseText);

      const startTime = Date.now();
      const result = await documentProcessor.processDocument(leaseFile, leaseId);
      const endTime = Date.now();

      const processingTime = endTime - startTime;

      // Should complete within 5 seconds (allowing for test overhead)
      expect(processingTime).toBeLessThan(5000);
      expect(result.leaseId).toBe(leaseId);

      extractTextSpy.mockRestore();
    });
  });
});