const Busboy = require('busboy'); const JSZip = require('jszip'); const { applyCorsHeaders, handleCorsPreflight } = require('../lib/cors-middleware'); // Helper function to send JSON with proper headers function sendJson(res, status, data) { res.setHeader('Content-Type', 'application/json'); res.status(status).end(JSON.stringify(data)); } module.exports = async (req, res) => { if (handleCorsPreflight(req, res, { allowedMethods: 'POST, OPTIONS' })) { return; } applyCorsHeaders(req, res, { allowedMethods: 'POST, OPTIONS' }); if (req.method !== 'POST') { sendJson(res, 405, { error: 'Method not allowed' }); return; } try { const busboy = Busboy({ headers: req.headers }); let fileData = null; let filename = null; busboy.on('file', (fieldname, file, info) => { filename = info.filename; const chunks = []; file.on('data', (chunk) => { chunks.push(chunk); }); file.on('end', () => { fileData = Buffer.concat(chunks); }); }); busboy.on('finish', async () => { if (!fileData || !filename) { res.status(400).json({ error: 'No file uploaded' }); return; } if (!filename.toLowerCase().endsWith('.docx')) { res.status(400).json({ error: 'Please upload a .docx file' }); return; } try { const remediatedFile = await remediateDocx(fileData, filename); // Always fix filename: replace underscores with hyphens and add -remediated suffix let suggestedName = filename .replace(/_/g, '-') // Replace all underscores with hyphens .replace(/\.docx$/i, '-remediated.docx'); // Add -remediated before extension res.setHeader('Content-Type', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'); res.setHeader('Content-Disposition', `attachment; filename="${suggestedName}"`); res.status(200).send(remediatedFile); } catch (error) { res.status(500).json({ error: error.message }); } }); req.pipe(busboy); } catch (error) { res.status(500).json({ error: error.message }); } }; async function remediateDocx(fileData, filename) { try { const zip = await JSZip.loadAsync(fileData); // Helper function to write only if content changed const writeIfChanged = (filename, original, modified) => { if (original !== modified && modified !== null) { zip.file(filename, modified); return true; } return false; }; // Process document.xml const docFile = zip.file('word/document.xml'); if (docFile) { const origDocXml = await docFile.async('string'); const afterShadows = removeShadowsOnly(origDocXml); const afterInlineContent = applyInlineContentFixes(afterShadows || origDocXml); writeIfChanged('word/document.xml', origDocXml, afterInlineContent); } // Process styles.xml const stylesFile = zip.file('word/styles.xml'); if (stylesFile) { const origStylesXml = await stylesFile.async('string'); const afterStylesShadows = removeShadowsOnly(origStylesXml); writeIfChanged('word/styles.xml', origStylesXml, afterStylesShadows); } // Process theme files const themeFile = zip.file('word/theme/theme1.xml'); if (themeFile) { const origThemeXml = await themeFile.async('string'); const afterTheme = removeShadowsOnly(origThemeXml); writeIfChanged('word/theme/theme1.xml', origThemeXml, afterTheme); } // Protection removal try { const settingsFile = zip.file('word/settings.xml'); if (settingsFile) { const origSettings = await settingsFile.async('string'); const hasAnyProt = /]*\/>/g, ''); cleaned = cleaned.replace(/]*>[\s\S]*?<\/w:(?:documentProtection|writeProtection|readOnlyRecommended|editRestrictions|formProtection|protection|docProtection)>/g, ''); cleaned = cleaned.replace(/]*\/>/g, ''); cleaned = cleaned.replace(/]*>[\s\S]*?<\/w:(?:enforcement|locked|trackRevisions)>/g, ''); cleaned = cleaned.replace(/]*\/>/g, ''); cleaned = cleaned.replace(/]*>[\s\S]*?<\/w:crypt[^>]*>/g, ''); cleaned = cleaned.replace(/\s?w:(?:locked|trackRevisions|enforcement)="[^"]*"/g, ''); writeIfChanged('word/settings.xml', origSettings, cleaned); } } } catch (e) { console.warn('[remediateDocx] Protection removal failed:', e.message); } // Generate with proper compression const remediatedBuffer = await zip.generateAsync({ type: 'nodebuffer', compression: 'DEFLATE', compressionOptions: { level: 6 } }); return remediatedBuffer; } catch (error) { throw new Error(`Failed to remediate document: ${error.message}`); } } function applyInlineContentFixes(xmlContent) { if (!xmlContent) return null; const original = xmlContent; let fixedXml = xmlContent; // Apply the same patterns as in the analysis function const floatingPatterns = [ // DrawingML anchor patterns (modern Word drawings) { pattern: /]*>([\s\S]*?)<\/wp:anchor>/g, replacement: function(match, content) { // Convert anchor (floating) to inline return `${content}`; } }, // Text wrapping patterns { pattern: /]*\/>/g, replacement: '' }, { pattern: /]*>[\s\S]*?<\/wp:wrapTight>/g, replacement: '' }, { pattern: /]*>[\s\S]*?<\/wp:wrapThrough>/g, replacement: '' }, { pattern: /]*\/>/g, replacement: '' }, { pattern: /]*\/>/g, replacement: '' }, // Position and alignment patterns { pattern: /]*>[\s\S]*?<\/wp:positionH>/g, replacement: '' }, { pattern: /]*>[\s\S]*?<\/wp:positionV>/g, replacement: '' }, // VML patterns for legacy compatibility { pattern: /mso-position-horizontal:[^;]*;?/g, replacement: '' }, { pattern: /mso-position-vertical:[^;]*;?/g, replacement: '' }, { pattern: /mso-wrap-style:[^;]*;?/g, replacement: '' }, { pattern: /left:\s*[^;]*;?/g, replacement: '' }, { pattern: /top:\s*[^;]*;?/g, replacement: '' } ]; // Apply fixes for floating elements floatingPatterns.forEach(patternObj => { const { pattern, replacement } = patternObj; if (typeof replacement === 'function') { fixedXml = fixedXml.replace(pattern, replacement); } else { fixedXml = fixedXml.replace(pattern, replacement); } }); // Special handling for drawing elements - ensure they are inline const drawingPattern = /]*>[\s\S]*?<\/w:drawing>/g; const drawingMatches = fixedXml.match(drawingPattern); if (drawingMatches) { drawingMatches.forEach(drawing => { // Check if this drawing contains floating elements if (drawing.includes('wp:anchor') && !drawing.includes('wp:inline')) { // Convert anchor to inline within the drawing let fixedDrawing = drawing.replace(/]*>/g, ''); fixedDrawing = fixedDrawing.replace(/<\/wp:anchor>/g, ''); if (fixedDrawing !== drawing) { fixedXml = fixedXml.replace(drawing, fixedDrawing); } } }); } // If nothing changed, return null if (fixedXml === original) return null; return fixedXml; } function removeShadowsOnly(xmlContent) { const original = xmlContent; let fixedXml = xmlContent; // 1. Remove basic Word text shadows fixedXml = fixedXml.replace(//g, ''); fixedXml = fixedXml.replace(/]*>.*?<\/w:shadow>/g, ''); fixedXml = fixedXml.replace(/\s+\w*shadow\w*\s*=\s*"[^"]*"/g, ''); // 2. Remove advanced DrawingML shadow effects fixedXml = fixedXml.replace(/]*\/>/g, ''); fixedXml = fixedXml.replace(/]*>.*?<\/a:outerShdw>/g, ''); fixedXml = fixedXml.replace(/]*\/>/g, ''); fixedXml = fixedXml.replace(/]*>.*?<\/a:innerShdw>/g, ''); fixedXml = fixedXml.replace(/]*\/>/g, ''); fixedXml = fixedXml.replace(/]*>.*?<\/a:prstShdw>/g, ''); // 3. Remove Office 2010+ shadow effects fixedXml = fixedXml.replace(/]*\/>/g, ''); fixedXml = fixedXml.replace(/]*>.*?<\/w14:shadow>/g, ''); fixedXml = fixedXml.replace(/]*\/>/g, ''); fixedXml = fixedXml.replace(/]*>.*?<\/w15:shadow>/g, ''); // 4. Remove shadow-related text effects and 3D properties fixedXml = fixedXml.replace(/]*\/>/g, ''); fixedXml = fixedXml.replace(/]*>.*?<\/w14:glow>/g, ''); fixedXml = fixedXml.replace(/]*\/>/g, ''); fixedXml = fixedXml.replace(/]*>.*?<\/w14:reflection>/g, ''); fixedXml = fixedXml.replace(/]*\/>/g, ''); fixedXml = fixedXml.replace(/]*>.*?<\/w14:props3d>/g, ''); // 5. Remove shadow properties and attributes (safely) // Remove only within attribute values, not entire element names fixedXml = fixedXml.replace(/\s+\w*shdw\w*\s*=\s*"[^"]*"/g, ''); // NOTE: Font normalization, font size fixes, and line spacing fixes have been // removed - these are now flagged for user attention instead of auto-fixed // If nothing changed, return null so callers can avoid rewriting the part if (fixedXml === original) return null; return fixedXml; }