const JSZip = require('jszip'); // Main PowerPoint analysis function async function analyzePowerPoint(fileData, filename) { const report = { fileName: filename, suggestedFileName: filename, summary: { fixed: 0, flagged: 0 }, details: { listFormattingIssues: [], imagesMissingOrBadAlt: [], } }; try { const zip = await JSZip.loadAsync(fileData); // Get list of slides const slides = []; zip.forEach((relativePath, file) => { if (relativePath.match(/^ppt\/slides\/slide\d+\.xml$/)) { slides.push(relativePath); } }); // Sort slides by number slides.sort((a, b) => { const numA = parseInt(a.match(/slide(\d+)\.xml$/)?.[1] || '0'); const numB = parseInt(b.match(/slide(\d+)\.xml$/)?.[1] || '0'); return numA - numB; }); console.log(`[analyzePowerPoint] Found ${slides.length} slides`); // Analyze each slide for (let i = 0; i < slides.length; i++) { const slidePath = slides[i]; const slideNumber = i + 1; const slideXml = await zip.file(slidePath)?.async('string'); const slideRelsPath = slidePath.replace('ppt/slides/', 'ppt/slides/_rels/').replace('.xml', '.xml.rels'); const slideRels = await zip.file(slideRelsPath)?.async('string'); if (slideXml) { // Check for list formatting issues (hyphenated paragraphs) const listIssues = checkListFormatting(slideXml, slideNumber); if (listIssues.length > 0) { report.details.listFormattingIssues.push(...listIssues); report.summary.flagged += listIssues.length; } // Check images for alt text const imageIssues = await analyzeSlideImages(slideXml, slideRels, slideNumber); if (imageIssues.length > 0) { report.details.imagesMissingOrBadAlt.push(...imageIssues); report.summary.flagged += imageIssues.length; } } } console.log(`[analyzePowerPoint] Analysis complete. Fixed: ${report.summary.fixed}, Flagged: ${report.summary.flagged}`); return report; } catch (error) { console.error('[analyzePowerPoint] Error:', error); throw new Error(`Failed to analyze PowerPoint: ${error.message}`); } } // Check for list formatting issues (hyphenated paragraphs that should be lists) function checkListFormatting(slideXml, slideNumber) { const issues = []; // Find all text elements in the slide const textMatches = slideXml.matchAll(/]*>(.*?)<\/a:t>/g); for (const match of textMatches) { const text = match[1]; // Check for hyphenated paragraphs that look like lists // Pattern: line starting with "-", "•", "–", "—" followed by text if (/^[\s]*[-–—•]\s+.+/.test(text)) { issues.push({ slideNumber: slideNumber, location: `Slide ${slideNumber}`, issue: `Possible improperly formatted list: "${text.substring(0, 50)}..."`, type: 'listFormatting' }); } } return issues; } // Analyze images in a slide async function analyzeSlideImages(slideXml, slideRels, slideNumber) { const issues = []; // Find all picture elements const picMatches = slideXml.matchAll(//g); for (const picMatch of picMatches) { const picXml = picMatch[0]; // Check for alt text (descr attribute in ) const nvPicPr = picXml.match(/([\s\S]*?)<\/p:nvPicPr>/); if (nvPicPr) { const cNvPr = nvPicPr[1].match(/]*>/); if (cNvPr) { const descrMatch = cNvPr[0].match(/descr="([^"]*)"/); const altText = descrMatch ? descrMatch[1] : ''; if (!altText || altText.trim().length === 0) { issues.push({ slideNumber: slideNumber, location: `Slide ${slideNumber}`, issue: 'Image missing alt text', type: 'image' }); } else if (altText.length > 250) { issues.push({ slideNumber: slideNumber, location: `Slide ${slideNumber}`, issue: `Image alt text is too long (${altText.length} characters, max 250)`, type: 'image' }); } } } } return issues; } module.exports = { analyzePowerPoint };