Spaces:

accessibilitychecker
/

AccessibilityCheckerBackend

Running

File size: 8,761 Bytes

bbfde3f

const Busboy = require('busboy');
const JSZip = require('jszip');
const { applyCorsHeaders, handleCorsPreflight } = require('../lib/cors-middleware');

let analyzePowerPoint;
try {
  const pptxAnalyzer = require('../lib/pptx-analyzer');
  analyzePowerPoint = pptxAnalyzer.analyzePowerPoint;
} catch (err) {
  console.error('Failed to load pptx-analyzer:', err);
}

// Helper function to send JSON with proper headers
function sendJson(res, status, data) {
  res.setHeader('Content-Type', 'application/json');
  res.status(status).end(JSON.stringify(data));
}

// Helper function to extract text from paragraph XML - moved to top for availability
function extractTextFromParagraph(paragraphXml) {
  const textMatches = paragraphXml.match(/<w:t[^>]*>(.*?)<\/w:t>/g);
  if (!textMatches) return '';
  
  return textMatches
    .map(t => t.replace(/<w:t[^>]*>|<\/w:t>/g, ''))
    .join('')
    .trim();
}

module.exports = async (req, res) => {
  if (handleCorsPreflight(req, res, { allowedMethods: 'POST, OPTIONS' })) {
    return;
  }
  applyCorsHeaders(req, res, { allowedMethods: 'POST, OPTIONS' });

  if (req.method !== 'POST') {
    sendJson(res, 405, { error: 'Method not allowed' });
    return;
  }

  try {
    const busboy = Busboy({ headers: req.headers });
    let fileData = null;
    let filename = null;

    busboy.on('file', (fieldname, file, info) => {
      filename = info.filename;
      const chunks = [];
      
      file.on('data', (chunk) => {
        chunks.push(chunk);
      });
      
      file.on('end', () => {
        fileData = Buffer.concat(chunks);
      });
    });

    busboy.on('finish', async () => {
      if (!fileData || !filename) {
        sendJson(res, 400, { error: 'No file uploaded' });
        return;
      }

      const filenameLower = filename.toLowerCase();
      
      // Support both PowerPoint and Word documents
      const isPowerPoint = ['.pptx', '.ppt', '.pps', '.pot', '.potx', '.ppsx'].some(ext => filenameLower.endsWith(ext));
      const isWord = filenameLower.endsWith('.docx');

      if (!isPowerPoint && !isWord) {
        sendJson(res, 400, { error: 'Please upload a PowerPoint or Word document (.docx, .pptx)' });
        return;
      }

      try {
        let report;
        if (isPowerPoint) {
          // Route PowerPoint files to the PowerPoint analyzer
          if (!analyzePowerPoint) {
            throw new Error('PowerPoint analyzer not available');
          }
          report = await analyzePowerPoint(fileData, filename);
        } else {
          // Route Word documents to the Word analyzer
          report = await analyzeDocx(fileData, filename);
        }
        
        sendJson(res, 200, {
          fileName: filename,
          suggestedFileName: filename,
          report: report
        });
      } catch (error) {
        console.error('Analysis error:', error);
        sendJson(res, 500, { error: error.message });
      }
    });

    req.pipe(busboy);

  } catch (error) {
    console.error('Upload error:', error);
    sendJson(res, 500, { error: error.message });
  }
};
module.exports.analyzeDocx = analyzeDocx;
async function analyzeDocx(fileData, filename) {
  const report = {
    fileName: filename,
    suggestedFileName: filename,
    summary: { fixed: 0, flagged: 0 },
    details: {
      // Requirement 1: Lists are formatted correctly
      hyphenatedParagraphsNeedingLists: [],
      formattedListsCount: 0,
      
      // Requirement 2: Images have alt text (max 250 chars)
      imagesMissingAltText: [],
      imagesWithAltTextOver250Chars: [],
      imagesWithValidAltText: 0,
    }
  };

  try {
    const zip = await JSZip.loadAsync(fileData);
    
    // Read core documents needed for the two requirements
    const documentXml = await zip.file('word/document.xml')?.async('string');
    const relsXml = await zip.file('word/_rels/document.xml.rels')?.async('string');
    
    // ===== REQUIREMENT 1: Check for lists formatted correctly =====
    if (documentXml) {
      const listIssues = analyzeListFormatting(documentXml);
      if (listIssues.hyphenatedParagraphs.length > 0) {
        report.details.hyphenatedParagraphsNeedingLists = listIssues.hyphenatedParagraphs;
        report.summary.flagged += listIssues.hyphenatedParagraphs.length;
      }
      report.details.formattedListsCount = listIssues.properlyFormattedLists;
    }
    
    // ===== REQUIREMENT 2: Check for images with alt text =====
    if (relsXml && documentXml) {
      const imageAnalysis = analyzeImageAltText(documentXml, relsXml);
      
      if (imageAnalysis.missingAltText.length > 0) {
        report.details.imagesMissingAltText = imageAnalysis.missingAltText;
        report.summary.flagged += imageAnalysis.missingAltText.length;
      }
      
      if (imageAnalysis.altTextOver250Chars.length > 0) {
        report.details.imagesWithAltTextOver250Chars = imageAnalysis.altTextOver250Chars;
        report.summary.flagged += imageAnalysis.altTextOver250Chars.length;
      }
      
      report.details.imagesWithValidAltText = imageAnalysis.validAltTextCount;
    }
    
    return report;
    
  } catch (error) {
    console.error('[analyzeDocx] Error analyzing document:', error);
    return {
      fileName: filename,
      error: error.message,
      summary: { fixed: 0, flagged: 0 },
      details: {}
    };
  }
}

// ===== HELPER FUNCTIONS =====

/**

 * Analyze list formatting in the document

 * Detects hyphenated paragraphs that should be formatted as lists

 */
function analyzeListFormatting(documentXml) {
  const results = {
    hyphenatedParagraphs: [],
    properlyFormattedLists: 0
  };

  if (!documentXml) return results;

  // Extract all paragraphs
  const paragraphMatches = documentXml.match(/<w:p[^>]*>([\s\S]*?)<\/w:p>/g) || [];
  
  paragraphMatches.forEach((paragraph, index) => {
    // Extract text content from paragraph
    const textMatches = paragraph.match(/<w:t[^>]*>(.*?)<\/w:t>/g) || [];
    const text = textMatches
      .map(t => t.replace(/<w:t[^>]*>|<\/w:t>/g, ''))
      .join('')
      .trim();

    // Check if paragraph starts with hyphen/dash (indicates list formatting issue)
    if (text && /^[-–—]\s+/.test(text)) {
      results.hyphenatedParagraphs.push({
        index: index + 1,
        text: text.substring(0, 100), // First 100 chars
        message: 'This paragraph appears to be a list item but is formatted as a regular paragraph'
      });
    }
    
    // Count properly formatted lists (pPr contains pStyle with list references)
    if (paragraph.includes('pStyle w:val="ListParagraph"') || paragraph.includes('numPr')) {
      results.properlyFormattedLists++;
    }
  });

  return results;
}

/**

 * Analyze image alt text requirements

 * Checks for missing alt text and validates length

 */
function analyzeImageAltText(documentXml, relsXml) {
  const results = {
    missingAltText: [],
    altTextOver250Chars: [],
    validAltTextCount: 0
  };

  if (!documentXml || !relsXml) return results;

  // Find all images/drawings
  const drawingMatches = documentXml.match(/<wp:inline[^>]*>[\s\S]*?<\/wp:inline>|<wp:anchor[^>]*>[\s\S]*?<\/wp:anchor>/g) || [];
  
  drawingMatches.forEach((drawing, index) => {
    // Extract relationship ID to find the image file
    const rIdMatch = drawing.match(/r:embed="(rId\d+)"/);
    if (!rIdMatch) return;

    const rId = rIdMatch[1];

    // Extract alternate text (docProperties)
    const altTextMatch = drawing.match(/<wp:docPr[^>]*descr="([^"]*)"/) || drawing.match(/<wp:cNvPicPr[^>]*>[\s\S]*?<a:picLocks[^>]*descr="([^"]*)"/);
    const altText = altTextMatch ? altTextMatch[1] : null;

    // Also check for extent/alt description in other formats
    const titleMatch = drawing.match(/<wp:docPr[^>]*name="([^"]*)"[^>]*title="([^"]*)"/) || drawing.match(/<wp:docPr[^>]*title="([^"]*)"[^>]*name="([^"]*)"/);

    // Check if this image has proper alt text
    if (!altText || altText.trim() === '') {
      results.missingAltText.push({
        index: index + 1,
        rId: rId,
        message: 'Image is missing alt text description'
      });
    } else if (altText.length > 250) {
      results.altTextOver250Chars.push({
        index: index + 1,
        rId: rId,
        altText: altText.substring(0, 100) + '...',
        length: altText.length,
        message: `Alt text is ${altText.length} characters (max 250)`
      });
    } else {
      // Valid alt text
      results.validAltTextCount++;
    }
  });

  return results;
}