#!/usr/bin/env node
// Test document with overlapping form patterns that would cause duplicates
const testDocumentWithDuplicateForms = `
Form with multiple detectable patterns:
FORMTEXT
Another form field:
FORMCHECKBOX
`;
// Duplicate prevention test functions
function extractTextFromParagraph(paragraph) {
const textMatch = paragraph.match(/]*>(.*?)<\/w:t>/g);
if (!textMatch) return '';
return textMatch.map(match => match.replace(/<[^>]*>/g, '')).join(' ');
}
function getFormType(formIndex) {
const formTypes = [
'text-field', 'checkbox-field', 'dropdown-field', 'form-data-complete',
'form-data', 'checkbox-control', 'dropdown-control', 'text-input',
'content-control', 'content-control-data', 'field-character',
'formtext-simple', 'formcheckbox-simple', 'formdropdown-simple'
];
return formTypes[formIndex] || 'form-element';
}
function isPriorityFormType(newType, currentType) {
const priorityOrder = {
'form-data-complete': 10,
'text-field': 9,
'checkbox-field': 9,
'dropdown-field': 9,
'checkbox-control': 8,
'dropdown-control': 8,
'text-input': 8,
'form-data': 7,
'content-control': 6,
'content-control-data': 5,
'field-character': 4,
'formtext-simple': 3,
'formcheckbox-simple': 3,
'formdropdown-simple': 3,
'form-element': 1
};
return (priorityOrder[newType] || 1) > (priorityOrder[currentType] || 1);
}
function testDuplicatePrevention(documentXml) {
const results = [];
let paragraphCount = 0;
let currentHeading = null;
let approximatePageNumber = 1;
// Track unique form field locations to prevent duplicates
const seenFormLocations = new Set();
const formElements = [
/]*FORMTEXT/,
/]*FORMCHECKBOX/,
/]*FORMDROPDOWN/,
//,
//,
//,
//,
//,
/FORMTEXT/,
/FORMCHECKBOX/,
/FORMDROPDOWN/
];
const paragraphRegex = /]*>[\s\S]*?<\/w:p>/g;
const paragraphs = documentXml.match(paragraphRegex) || [];
paragraphs.forEach((paragraph, index) => {
paragraphCount++;
if (paragraphCount % 15 === 0) {
approximatePageNumber++;
}
if (/]*>[\s\S]*?<\/w:p>/g;
const paragraphs = testDocumentWithDuplicateForms.match(paragraphRegex) || [];
const formElements = [
/]*FORMTEXT/, /]*FORMCHECKBOX/, /]*FORMDROPDOWN/,
//, //, //, //, //, /FORMTEXT/, /FORMCHECKBOX/, /FORMDROPDOWN/
];
paragraphs.forEach(paragraph => {
formElements.forEach(regex => {
if (paragraph.match(regex)) {
totalPossibleMatches++;
}
});
});
console.log(`Total possible matches without deduplication: ${totalPossibleMatches}`);
console.log(`Actual results after deduplication: ${results.length}`);
console.log(`Duplicates prevented: ${totalPossibleMatches - results.length}`);
if (results.length < totalPossibleMatches) {
console.log('\nā
SUCCESS: Duplicate prevention is working!');
console.log(' Each paragraph with form fields is reported only once');
console.log(' Higher priority form types are selected when multiple patterns match');
} else {
console.log('\nā ISSUE: Duplicate prevention may not be working properly');
}
console.log('\nšÆ Key Features:');
console.log(' ⢠One form detection per paragraph maximum');
console.log(' ⢠Priority-based form type selection');
console.log(' ⢠Location-based deduplication using Set()');
console.log(' ⢠Debug info showing all detected patterns');