| const fs = require('fs');
|
| const JSZip = require('jszip');
|
|
|
|
|
| async function testDuplicateLinkHandling() {
|
| console.log('=== Testing Duplicate Link Handling ===\n');
|
|
|
| try {
|
|
|
| const testContent = `
|
| <w:document>
|
| <w:body>
|
| <w:p>
|
| <w:hyperlink r:id="rId1">
|
| <w:r><w:t>click here</w:t></w:r>
|
| </w:hyperlink>
|
| </w:p>
|
| <w:p>
|
| <w:hyperlink r:id="rId2">
|
| <w:r><w:t>click here</w:t></w:r>
|
| </w:hyperlink>
|
| </w:p>
|
| <w:p>
|
| <w:hyperlink r:id="rId3">
|
| <w:r><w:t>Click this link:</w:t></w:r>
|
| </w:hyperlink>
|
| </w:p>
|
| <w:p>
|
| <w:hyperlink r:id="rId4">
|
| <w:r><w:t>read more</w:t></w:r>
|
| </w:hyperlink>
|
| </w:p>
|
| <w:p>
|
| <w:hyperlink r:id="rId5">
|
| <w:r><w:t>read more</w:t></w:r>
|
| </w:hyperlink>
|
| </w:p>
|
| <w:p>
|
| <w:hyperlink r:id="rId6">
|
| <w:r><w:t>read more</w:t></w:r>
|
| </w:hyperlink>
|
| </w:p>
|
| <w:p>
|
| <w:hyperlink r:id="rId7">
|
| <w:r><w:t>www.example.com</w:t></w:r>
|
| </w:hyperlink>
|
| </w:p>
|
| <w:p>
|
| <w:hyperlink r:id="rId8">
|
| <w:r><w:t>www.example.com</w:t></w:r>
|
| </w:hyperlink>
|
| </w:p>
|
| </w:body>
|
| </w:document>
|
| `;
|
|
|
| console.log('π§ͺ Testing with Content Containing Duplicates:');
|
| console.log(' - "click here" appears 2 times');
|
| console.log(' - "Click this link:" appears 1 time');
|
| console.log(' - "read more" appears 3 times');
|
| console.log(' - "www.example.com" appears 2 times');
|
| console.log(' Total link elements: 8');
|
| console.log(' Expected unique issues: 4\n');
|
|
|
| const results = testLinkAnalysis(testContent);
|
|
|
| console.log('π Results:');
|
| console.log(` Total non-descriptive links found: ${results.nonDescriptiveLinks.length}`);
|
| console.log(` Should be 4 (no duplicates)\n`);
|
|
|
| const linkTextCounts = {};
|
| results.nonDescriptiveLinks.forEach((link, index) => {
|
| console.log(` ${index + 1}. "${link.linkText}" (${link.type})`);
|
| console.log(` Location: ${link.location}`);
|
| console.log(` Recommendation: ${link.recommendation}`);
|
| console.log('');
|
|
|
|
|
| linkTextCounts[link.linkText] = (linkTextCounts[link.linkText] || 0) + 1;
|
| });
|
|
|
| console.log('π Verification:');
|
| let hasDuplicates = false;
|
| Object.entries(linkTextCounts).forEach(([linkText, count]) => {
|
| if (count > 1) {
|
| console.log(` β DUPLICATE: "${linkText}" appears ${count} times`);
|
| hasDuplicates = true;
|
| } else {
|
| console.log(` β
UNIQUE: "${linkText}" appears ${count} time`);
|
| }
|
| });
|
|
|
| if (!hasDuplicates && results.nonDescriptiveLinks.length === 4) {
|
| console.log('\nβ
Duplicate handling test PASSED!');
|
| console.log(' All duplicate link texts were properly deduplicated.');
|
| } else {
|
| console.log('\nβ Duplicate handling test FAILED!');
|
| console.log(' Expected 4 unique issues, got:', results.nonDescriptiveLinks.length);
|
| }
|
|
|
| } catch (error) {
|
| console.error('β Test failed:', error.message);
|
| }
|
| }
|
|
|
|
|
| function testLinkAnalysis(documentXml) {
|
| const results = { nonDescriptiveLinks: [] };
|
| const seenLinkTexts = new Set();
|
|
|
| const genericPhrases = [
|
| 'click here', 'here', 'read more', 'more', 'link', 'this link',
|
| 'see more', 'learn more', 'find out more', 'more info', 'more information'
|
| ];
|
|
|
| const genericPatterns = [
|
| /^click\s+/i,
|
| /\bclick\s+\w+\s*:?\s*$/i,
|
| /^(here|there)\s*:?\s*$/i,
|
| /^(this|that)\s+link\s*:?\s*$/i,
|
| /^read\s+(more|on)\s*:?\s*$/i,
|
| /^see\s+(more|here|this)\s*:?\s*$/i,
|
| /^(more|info|information)\s*:?\s*$/i,
|
| /^(download|view|open)\s*:?\s*$/i
|
| ];
|
|
|
| const hyperlinkMatches = documentXml.match(/<w:hyperlink[^>]*>[\s\S]*?<\/w:hyperlink>/g) || [];
|
|
|
| hyperlinkMatches.forEach((link, index) => {
|
| const textMatch = link.match(/<w:t[^>]*>(.*?)<\/w:t>/);
|
| if (textMatch) {
|
| const linkText = textMatch[1].toLowerCase().trim();
|
|
|
|
|
| if (!seenLinkTexts.has(linkText)) {
|
| const isGeneric = genericPhrases.some(phrase => linkText === phrase);
|
| const isGenericPattern = genericPatterns.some(pattern => pattern.test(linkText));
|
| const isUrl = linkText.includes('www.') || linkText.includes('http');
|
|
|
| let issueType = null;
|
| if (isGeneric || isGenericPattern) issueType = 'generic';
|
| if (isUrl) issueType = 'url-as-text';
|
|
|
| if (issueType) {
|
| seenLinkTexts.add(linkText);
|
| results.nonDescriptiveLinks.push({
|
| type: issueType,
|
| linkText: linkText,
|
| location: `Paragraph ${index + 1}`,
|
| approximatePage: 1,
|
| context: 'Document body',
|
| recommendation: generateRecommendation(linkText, issueType)
|
| });
|
| }
|
| }
|
| }
|
| });
|
|
|
| return results;
|
| }
|
|
|
| function generateRecommendation(linkText, issueType) {
|
| if (issueType === 'generic') {
|
| return 'Replace with descriptive text that explains where the link goes';
|
| }
|
| if (issueType === 'url-as-text') {
|
| return 'Replace URL with descriptive text like "Visit our website"';
|
| }
|
| return 'Use clear, descriptive language';
|
| }
|
|
|
| testDuplicateLinkHandling(); |