File size: 4,458 Bytes
bbfde3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
const JSZip = require('jszip');

// Main PowerPoint analysis function
async function analyzePowerPoint(fileData, filename) {
  const report = {
    fileName: filename,
    suggestedFileName: filename,
    summary: { fixed: 0, flagged: 0 },
    details: {
      listFormattingIssues: [],
      imagesMissingOrBadAlt: [],
    }
  };

  try {
    const zip = await JSZip.loadAsync(fileData);
    
    // Get list of slides
    const slides = [];
    zip.forEach((relativePath, file) => {
      if (relativePath.match(/^ppt\/slides\/slide\d+\.xml$/)) {
        slides.push(relativePath);
      }
    });
    
    // Sort slides by number
    slides.sort((a, b) => {
      const numA = parseInt(a.match(/slide(\d+)\.xml$/)?.[1] || '0');
      const numB = parseInt(b.match(/slide(\d+)\.xml$/)?.[1] || '0');
      return numA - numB;
    });
    
    console.log(`[analyzePowerPoint] Found ${slides.length} slides`);
    
    // Analyze each slide
    for (let i = 0; i < slides.length; i++) {
      const slidePath = slides[i];
      const slideNumber = i + 1;
      const slideXml = await zip.file(slidePath)?.async('string');
      const slideRelsPath = slidePath.replace('ppt/slides/', 'ppt/slides/_rels/').replace('.xml', '.xml.rels');
      const slideRels = await zip.file(slideRelsPath)?.async('string');
      
      if (slideXml) {
        // Check for list formatting issues (hyphenated paragraphs)
        const listIssues = checkListFormatting(slideXml, slideNumber);
        if (listIssues.length > 0) {
          report.details.listFormattingIssues.push(...listIssues);
          report.summary.flagged += listIssues.length;
        }
        
        // Check images for alt text
        const imageIssues = await analyzeSlideImages(slideXml, slideRels, slideNumber);
        if (imageIssues.length > 0) {
          report.details.imagesMissingOrBadAlt.push(...imageIssues);
          report.summary.flagged += imageIssues.length;
        }
      }
    }
    
    console.log(`[analyzePowerPoint] Analysis complete. Fixed: ${report.summary.fixed}, Flagged: ${report.summary.flagged}`);
    return report;
    
  } catch (error) {
    console.error('[analyzePowerPoint] Error:', error);
    throw new Error(`Failed to analyze PowerPoint: ${error.message}`);
  }
}

// Check for list formatting issues (hyphenated paragraphs that should be lists)
function checkListFormatting(slideXml, slideNumber) {
  const issues = [];
  
  // Find all text elements in the slide
  const textMatches = slideXml.matchAll(/<a:t[^>]*>(.*?)<\/a:t>/g);
  
  for (const match of textMatches) {
    const text = match[1];
    
    // Check for hyphenated paragraphs that look like lists
    // Pattern: line starting with "-", "•", "–", "—" followed by text
    if (/^[\s]*[-–—•]\s+.+/.test(text)) {
      issues.push({
        slideNumber: slideNumber,
        location: `Slide ${slideNumber}`,
        issue: `Possible improperly formatted list: "${text.substring(0, 50)}..."`,
        type: 'listFormatting'
      });
    }
  }
  
  return issues;
}

// Analyze images in a slide
async function analyzeSlideImages(slideXml, slideRels, slideNumber) {
  const issues = [];
  
  // Find all picture elements
  const picMatches = slideXml.matchAll(/<p:pic[\s\S]*?<\/p:pic>/g);
  
  for (const picMatch of picMatches) {
    const picXml = picMatch[0];
    
    // Check for alt text (descr attribute in <p:cNvPr>)
    const nvPicPr = picXml.match(/<p:nvPicPr>([\s\S]*?)<\/p:nvPicPr>/);
    if (nvPicPr) {
      const cNvPr = nvPicPr[1].match(/<p:cNvPr[^>]*>/);
      if (cNvPr) {
        const descrMatch = cNvPr[0].match(/descr="([^"]*)"/);
        const altText = descrMatch ? descrMatch[1] : '';
        
        if (!altText || altText.trim().length === 0) {
          issues.push({
            slideNumber: slideNumber,
            location: `Slide ${slideNumber}`,
            issue: 'Image missing alt text',
            type: 'image'
          });
        } else if (altText.length > 250) {
          issues.push({
            slideNumber: slideNumber,
            location: `Slide ${slideNumber}`,
            issue: `Image alt text is too long (${altText.length} characters, max 250)`,
            type: 'image'
          });
        }
      }
    }
  }
  
  return issues;
}

module.exports = { analyzePowerPoint };