File size: 5,146 Bytes
bbfde3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
const fs = require('fs');
const JSZip = require('jszip');

async function debugDetection() {
  console.log('=== Debugging Detection Issues ===\n');
  
  // Test with an actual document
  const testFile = 'reports/Protected_remediated_by_agent.docx';
  
  if (!fs.existsSync(testFile)) {
    console.log('Test file not found, trying other files...');
    const reports = fs.readdirSync('reports');
    const docxFiles = reports.filter(f => f.endsWith('.docx'));
    if (docxFiles.length === 0) {
      console.log('No .docx files found in reports folder');
      return;
    }
    console.log(`Using ${docxFiles[0]} instead`);
  }
  
  try {
    const fileData = fs.readFileSync(testFile);
    const zip = await JSZip.loadAsync(fileData);
    
    console.log('1. CHECKING DOCUMENT.XML');
    const documentXml = await zip.file('word/document.xml')?.async('string');
    if (documentXml) {
      console.log(`Document XML length: ${documentXml.length}`);
      
      // Check for shadows
      const shadowTests = [
        /<w:shadow\s*\/>/,
        /<w:shadow[^>]*>/,
        /<a:outerShdw[^>]*>/,
        /<w14:shadow[^>]*>/
      ];
      
      console.log('\nShadow detection:');
      shadowTests.forEach((regex, i) => {
        const matches = documentXml.match(regex);
        console.log(`  Test ${i+1}: ${matches ? matches.length + ' matches' : 'no matches'}`);
        if (matches) console.log(`    First match: ${matches[0].slice(0, 100)}`);
      });
      
      // Check for serif fonts
      console.log('\nFont detection:');
      const serifMatches = documentXml.match(/(Times|Georgia|Garamond|serif)/gi);
      console.log(`  Serif fonts: ${serifMatches ? serifMatches.length + ' matches' : 'none found'}`);
      if (serifMatches) console.log(`    Found: ${[...new Set(serifMatches)].join(', ')}`);
      
      // Check font declarations
      const fontMatches = documentXml.match(/w:ascii="[^"]*"/g);
      if (fontMatches) {
        console.log(`  Font declarations: ${fontMatches.length}`);
        const uniqueFonts = [...new Set(fontMatches.map(m => m.match(/w:ascii="([^"]*)"/)[1]))];
        console.log(`    Fonts found: ${uniqueFonts.join(', ')}`);
      }
      
      // Check for small font sizes
      console.log('\nFont size detection:');
      const sizeMatches = documentXml.match(/<w:sz w:val="(\d+)"/g);
      if (sizeMatches) {
        console.log(`  Size declarations: ${sizeMatches.length}`);
        const sizes = sizeMatches.map(m => parseInt(m.match(/w:val="(\d+)"/)[1]));
        const smallSizes = sizes.filter(s => s < 22);
        console.log(`    Sizes found: ${[...new Set(sizes)].sort((a,b) => a-b).join(', ')}`);
        console.log(`    Small sizes (< 22): ${smallSizes.length > 0 ? smallSizes.join(', ') : 'none'}`);
      } else {
        console.log('  No size declarations found');
      }
      
      // Check line spacing
      console.log('\nLine spacing detection:');
      const spacingMatches = documentXml.match(/<w:spacing[^>]*w:line="(\d+)"[^>]*\/>/g);
      if (spacingMatches) {
        console.log(`  Spacing declarations: ${spacingMatches.length}`);
        spacingMatches.forEach(match => {
          const lineValue = parseInt(match.match(/w:line="(\d+)"/)[1]);
          console.log(`    ${match} -> ${lineValue} ${lineValue < 360 ? '(NEEDS FIX)' : '(OK)'}`);
        });
      } else {
        console.log('  No explicit spacing declarations found');
      }
      
      // Check for exact spacing
      if (documentXml.includes('w:lineRule="exact"')) {
        console.log('  Found exact line spacing rule (NEEDS FIX)');
      }
      
      // Check for paragraphs without spacing
      const totalParas = (documentXml.match(/<w:p[^>]*>/g) || []).length;
      const parasWithSpacing = (documentXml.match(/<w:p[^>]*>.*?<w:pPr[^>]*>.*?<w:spacing/gs) || []).length;
      console.log(`  Total paragraphs: ${totalParas}`);
      console.log(`  Paragraphs with spacing: ${parasWithSpacing}`);
      console.log(`  Paragraphs without spacing: ${totalParas - parasWithSpacing} ${totalParas - parasWithSpacing > 0 ? '(NEEDS FIX)' : '(OK)'}`);
    }
    
    console.log('\n2. CHECKING STYLES.XML');
    const stylesXml = await zip.file('word/styles.xml')?.async('string');
    if (stylesXml) {
      console.log(`Styles XML length: ${stylesXml.length}`);
      
      // Quick checks for styles
      const styleSerifMatches = stylesXml.match(/(Times|Georgia|Garamond|serif)/gi);
      console.log(`Serif fonts in styles: ${styleSerifMatches ? styleSerifMatches.length : 0}`);
      
      const styleSizeMatches = stylesXml.match(/<w:sz w:val="(\d+)"/g);
      if (styleSizeMatches) {
        const sizes = styleSizeMatches.map(m => parseInt(m.match(/w:val="(\d+)"/)[1]));
        const smallSizes = sizes.filter(s => s < 22);
        console.log(`Small font sizes in styles: ${smallSizes.length > 0 ? smallSizes.join(', ') : 'none'}`);
      }
    }
    
  } catch (error) {
    console.error('Debug failed:', error.message);
  }
}

debugDetection();