#!/usr/bin/env node // Test document with overlapping form patterns that would cause duplicates const testDocumentWithDuplicateForms = ` Form with multiple detectable patterns: FORMTEXT Another form field: FORMCHECKBOX `; // Duplicate prevention test functions function extractTextFromParagraph(paragraph) { const textMatch = paragraph.match(/]*>(.*?)<\/w:t>/g); if (!textMatch) return ''; return textMatch.map(match => match.replace(/<[^>]*>/g, '')).join(' '); } function getFormType(formIndex) { const formTypes = [ 'text-field', 'checkbox-field', 'dropdown-field', 'form-data-complete', 'form-data', 'checkbox-control', 'dropdown-control', 'text-input', 'content-control', 'content-control-data', 'field-character', 'formtext-simple', 'formcheckbox-simple', 'formdropdown-simple' ]; return formTypes[formIndex] || 'form-element'; } function isPriorityFormType(newType, currentType) { const priorityOrder = { 'form-data-complete': 10, 'text-field': 9, 'checkbox-field': 9, 'dropdown-field': 9, 'checkbox-control': 8, 'dropdown-control': 8, 'text-input': 8, 'form-data': 7, 'content-control': 6, 'content-control-data': 5, 'field-character': 4, 'formtext-simple': 3, 'formcheckbox-simple': 3, 'formdropdown-simple': 3, 'form-element': 1 }; return (priorityOrder[newType] || 1) > (priorityOrder[currentType] || 1); } function testDuplicatePrevention(documentXml) { const results = []; let paragraphCount = 0; let currentHeading = null; let approximatePageNumber = 1; // Track unique form field locations to prevent duplicates const seenFormLocations = new Set(); const formElements = [ /]*FORMTEXT/, /]*FORMCHECKBOX/, /]*FORMDROPDOWN/, //, //, //, //, //, /FORMTEXT/, /FORMCHECKBOX/, /FORMDROPDOWN/ ]; const paragraphRegex = /]*>[\s\S]*?<\/w:p>/g; const paragraphs = documentXml.match(paragraphRegex) || []; paragraphs.forEach((paragraph, index) => { paragraphCount++; if (paragraphCount % 15 === 0) { approximatePageNumber++; } if (/]*>[\s\S]*?<\/w:p>/g; const paragraphs = testDocumentWithDuplicateForms.match(paragraphRegex) || []; const formElements = [ /]*FORMTEXT/, /]*FORMCHECKBOX/, /]*FORMDROPDOWN/, //, //, //, //, //, /FORMTEXT/, /FORMCHECKBOX/, /FORMDROPDOWN/ ]; paragraphs.forEach(paragraph => { formElements.forEach(regex => { if (paragraph.match(regex)) { totalPossibleMatches++; } }); }); console.log(`Total possible matches without deduplication: ${totalPossibleMatches}`); console.log(`Actual results after deduplication: ${results.length}`); console.log(`Duplicates prevented: ${totalPossibleMatches - results.length}`); if (results.length < totalPossibleMatches) { console.log('\nāœ… SUCCESS: Duplicate prevention is working!'); console.log(' Each paragraph with form fields is reported only once'); console.log(' Higher priority form types are selected when multiple patterns match'); } else { console.log('\nāŒ ISSUE: Duplicate prevention may not be working properly'); } console.log('\nšŸŽÆ Key Features:'); console.log(' • One form detection per paragraph maximum'); console.log(' • Priority-based form type selection'); console.log(' • Location-based deduplication using Set()'); console.log(' • Debug info showing all detected patterns');