/** * Readability Analysis Script * * This script analyzes the readability of rendered documentation content using standard * readability metrics. It has a dual purpose: * * 1. **GitHub Actions Workflow**: Automatically runs on pull requests to analyze changed * content files and post readability reports as PR comments. * * 2. **Local Development Tool**: Can be run locally by writers to test readability * of specific files during content creation and editing. * * WORKFLOW USAGE: * - Triggered automatically on PRs with content changes * - Uses CHANGED_FILES environment variable from get-changed-files action * - Outputs report to console and saves to readability-report.md for PR commenting * - Requires local server to be running to analyze rendered content * * LOCAL USAGE: * 1. Start local development server: `npm start` * 2. Run analysis on specific files: * - Single file: `npm run readability-report -- --paths content/file.md` * - Multiple files: `npm run readability-report -- --paths content/file1.md content/file2.md` * - Get help: `npm run readability-report -- --help` * * FEATURES: * - Analyzes rendered content (not raw Markdown) to account for Liquid templating * - Calculates multiple readability metrics (Flesch Reading Ease, Gunning Fog, FORCAST, etc.) * - Provides detailed reports with improvement recommendations * - Filters out code blocks and non-prose elements for accurate analysis * * REQUIREMENTS: * - Local server running on localhost:4000 * - Content files must be in content/ directory * - Files must be accessible via rendered site URLs */ import fs from 'fs' import path from 'path' import cheerio from 'cheerio' import { fetchWithRetry } from '@/frame/lib/fetch-utils' interface ReadabilityMetrics { fleschReadingEase: number fleschKincaidGrade: number gunningFog: number colemanLiau: number automatedReadabilityIndex: number smogIndex: number forcastGrade: number } interface PageReadability { path: string url: string title: string metrics: ReadabilityMetrics wordCount: number sentenceCount: number estimatedReadingTime: string } async function main() { // Check for help flag const args = process.argv.slice(2) if (args.includes('--help') || args.includes('-h')) { console.log(` Usage: npm run readability-report [-- --paths ...] Examples: # Analyze specific files npm run readability-report -- --paths content/copilot/using-github-copilot.md content/get-started/quickstart.md # Analyze a single file npm run readability-report -- --paths content/copilot/using-github-copilot.md # Use environment variable (for CI) CHANGED_FILES="content/file1.md content/file2.md" npm run readability-report Note: Requires a local server running on localhost:4000 (npm start) `) return } console.log('Starting readability analysis...') // Get changed content files from environment variable or command line arguments const changedFiles = getChangedContentFiles() if (changedFiles.length === 0) { console.log('No content files to analyze. Use --help for usage information.') return } console.log(`Analyzing readability for ${changedFiles.length} changed files:`) for (const file of changedFiles) { console.log(` - ${file}`) } // Wait for server to be ready await waitForServer() // Analyze each changed file const results: PageReadability[] = [] for (const filePath of changedFiles) { try { const result = await analyzeFile(filePath) if (result) { results.push(result) console.log(`✓ Analyzed: ${result.path}`) } } catch (error) { console.error(`✗ Failed to analyze ${filePath}:`, (error as Error).message) } } // Generate and output report const report = generateReport(results) // Always output to console for local development console.log(`\n${report}`) // If running in CI, also save report for commenting on PR if (process.env.GITHUB_ACTIONS) { fs.writeFileSync('readability-report.md', report) console.log('\nReport saved to readability-report.md') } } function getChangedContentFiles(): string[] { // Check for command line arguments first const args = process.argv.slice(2) const pathsIndex = args.indexOf('--paths') if (pathsIndex !== -1 && pathsIndex + 1 < args.length) { // Get all arguments after --paths until we hit another flag or end const paths: string[] = [] for (let i = pathsIndex + 1; i < args.length; i++) { if (args[i].startsWith('--')) break paths.push(args[i]) } return paths.filter((filePath) => { // Filter for content files only (not data files, READMEs, etc.) return ( filePath.endsWith('.md') && filePath.split(path.sep)[0] === 'content' && path.basename(filePath) !== 'README.md' ) }) } // Fall back to environment variable (for CI) const spaceSeparatedList = process.env.CHANGED_FILES || '' return spaceSeparatedList.split(/\s+/g).filter((filePath) => { // Filter for content files only (not data files, READMEs, etc.) return ( filePath.endsWith('.md') && filePath.split(path.sep)[0] === 'content' && path.basename(filePath) !== 'README.md' ) }) } function makeURL(urlPath: string): string { return `http://localhost:4000${urlPath}` } async function waitForServer(): Promise { console.log('Waiting for server to be ready...') const maxAttempts = 30 const delayMs = 2000 for (let attempt = 1; attempt <= maxAttempts; attempt++) { try { const response = await fetchWithRetry(makeURL('/'), undefined, { timeout: 5000, }) if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`) } console.log('Server is ready!') return } catch (error) { if (attempt === maxAttempts) { console.error('Server failed to start. Last error:', (error as Error).message) throw new Error(`Server failed to start after ${maxAttempts} attempts`) } console.log( `Attempt ${attempt}/${maxAttempts} failed (${(error as Error).message}), retrying in ${delayMs}ms...`, ) await new Promise((resolve) => setTimeout(resolve, delayMs)) } } } async function analyzeFile(filePath: string): Promise { // Convert file path to URL path // content/get-started/foo.md -> /get-started/foo const urlPath = `/${filePath .replace(/^content\//, '') .replace(/\.md$/, '') .replace(/\/index$/, '')}` try { // Fetch the rendered page const response = await fetchWithRetry(makeURL(urlPath), undefined, { timeout: 30000, throwHttpErrors: false, }) if (response.status !== 200) { console.warn(`Skipping ${urlPath}: HTTP ${response.status}`) return null } // Parse HTML and extract content const body = await response.text() const $ = cheerio.load(body) // Get page title const title = $('h1').first().text().trim() || $('title').text().trim() || 'Untitled' // Extract main content text (excluding navigation, sidebars, etc.) // Focus on the main article content with more specific selectors const contentSelectors = [ 'article .markdown-body', // Most specific - article content in docs '.markdown-body', // GitHub markdown content 'article', // Generic article 'main', // Main content area '[data-testid="lead"]', // Lead content '#article-contents', // Fallback ] let contentText = '' for (const selector of contentSelectors) { const element = $(selector) if (element.length > 0) { // Remove code blocks and other non-prose elements element.find('pre, code, .highlight').remove() element.find('nav, .breadcrumb, .pagination').remove() element.find('[data-testid="breadcrumbs"]').remove() element.find('.js-search-results').remove() element.find('aside, .sidebar').remove() element.find('.edit-this-page').remove() // Get the text content directly, which preserves natural spacing contentText = element.text() // Clean up the text to remove excessive whitespace contentText = contentText .replace(/\s+/g, ' ') // Replace multiple whitespace with single space .trim() break } } if (!contentText.trim()) { console.warn(`No content found for ${urlPath}`) return null } // Calculate readability metrics const metrics = await calculateReadability(contentText) // Calculate estimated reading time based on word count and complexity const estimatedReadingTime = estimateReadingTime( countWords(contentText), metrics.fleschReadingEase, ) return { path: filePath, url: urlPath, title, metrics, wordCount: countWords(contentText), sentenceCount: countSentences(contentText), estimatedReadingTime, } } catch (error) { throw new Error(`Failed to analyze ${urlPath}: ${(error as Error).message}`) } } async function calculateReadability(text: string): Promise { // Direct implementation of readability formulas const words = countWords(text) const sentences = countSentences(text) const syllables = countSyllables(text) const complexWords = countComplexWords(text) const singleSyllableWords = countSingleSyllableWords(text) if (sentences === 0 || words === 0) { return { fleschReadingEase: 0, fleschKincaidGrade: 0, gunningFog: 0, colemanLiau: 0, automatedReadabilityIndex: 0, smogIndex: 0, forcastGrade: 0, } } // Flesch Reading Ease: 206.835 - (1.015 × ASL) - (84.6 × ASW) const avgSentenceLength = words / sentences const avgSyllablesPerWord = syllables / words const fleschReadingEase = Math.max( 0, 206.835 - 1.015 * avgSentenceLength - 84.6 * avgSyllablesPerWord, ) // Flesch-Kincaid Grade Level: (0.39 × ASL) + (11.8 × ASW) - 15.59 const fleschKincaidGrade = 0.39 * avgSentenceLength + 11.8 * avgSyllablesPerWord - 15.59 // Gunning Fog Index: 0.4 × (ASL + percentage of complex words) const complexWordPercentage = (complexWords / words) * 100 const gunningFog = 0.4 * (avgSentenceLength + complexWordPercentage) // Coleman-Liau Index: 0.0588 × L - 0.296 × S - 15.8 const avgCharsPer100Words = (text.replace(/\s/g, '').length / words) * 100 const avgSentencesPer100Words = (sentences / words) * 100 const colemanLiau = 0.0588 * avgCharsPer100Words - 0.296 * avgSentencesPer100Words - 15.8 // Automated Readability Index: 4.71 × (characters/words) + 0.5 × (words/sentences) - 21.43 const avgCharsPerWord = text.replace(/\s/g, '').length / words const automatedReadabilityIndex = 4.71 * avgCharsPerWord + 0.5 * avgSentenceLength - 21.43 // SMOG Index: approximately 1.0430 × sqrt(complex words × 30/sentences) + 3.1291 const smogIndex = 1.043 * Math.sqrt((complexWords * 30) / sentences) + 3.1291 // FORCAST Grade Level: 20 - (N / 10), where N = single-syllable words per 150 words // Scale to 150-word sample proportion const singleSyllablePer150Words = (singleSyllableWords / words) * 150 const forcastGrade = Math.max(5, 20 - singleSyllablePer150Words / 10) // Can't go below 5th grade return { fleschReadingEase: Math.round(fleschReadingEase * 100) / 100, fleschKincaidGrade: Math.max(0, Math.round(fleschKincaidGrade * 100) / 100), gunningFog: Math.max(0, Math.round(gunningFog * 100) / 100), colemanLiau: Math.max(0, Math.round(colemanLiau * 100) / 100), automatedReadabilityIndex: Math.max(0, Math.round(automatedReadabilityIndex * 100) / 100), smogIndex: Math.max(0, Math.round(smogIndex * 100) / 100), forcastGrade: Math.max(5, Math.round(forcastGrade * 100) / 100), } } function countSyllablesInWord(word: string): number { // Remove non-alphabetic characters const cleanWord = word.toLowerCase().replace(/[^a-z]/g, '') if (cleanWord.length === 0) return 0 // Count vowel groups const vowelGroups = cleanWord.match(/[aeiouy]+/g) || [] let syllables = vowelGroups.length // Subtract silent 'e' at end if (cleanWord.endsWith('e') && syllables > 1) { syllables -= 1 } // Ensure at least 1 syllable per word return Math.max(1, syllables) } function countWords(text: string): number { // Clean the text and split into words const cleanText = text .replace(/\s+/g, ' ') // Replace multiple whitespace with single space .trim() if (!cleanText) return 0 // Split on whitespace and filter out empty strings const words = cleanText.split(/\s+/).filter((word) => { // Remove punctuation and check if there are actual letters/numbers const cleanWord = word.replace(/[^\w]/g, '') return cleanWord.length > 0 }) return words.length } function countSentences(text: string): number { // Clean and normalize the text first const cleanText = text .replace(/\s+/g, ' ') // Replace multiple whitespace with single space .trim() // Split on sentence-ending punctuation, being more conservative // Only count actual sentence-ending punctuation, not structural breaks const sentences = cleanText .split(/[.!?]+(?=\s|$)/) // Only split on punctuation followed by space or end .map((s) => s.trim()) .filter((s) => s.length > 0) return Math.max(1, sentences.length) } function countSyllables(text: string): number { // Simple syllable counting approximation const words = text.toLowerCase().split(/\s+/) let syllableCount = 0 for (const word of words) { if (word.length === 0) continue syllableCount += countSyllablesInWord(word) } return syllableCount } function countSingleSyllableWords(text: string): number { // Count words with exactly 1 syllable const words = text.toLowerCase().split(/\s+/) let singleSyllableCount = 0 for (const word of words) { if (word.length === 0) continue const syllables = countSyllablesInWord(word) if (syllables === 1) { singleSyllableCount += 1 } } return singleSyllableCount } function countComplexWords(text: string): number { // Count words with 3+ syllables (approximation) const words = text.toLowerCase().split(/\s+/) let complexCount = 0 for (const word of words) { if (word.length === 0) continue const syllables = countSyllablesInWord(word) if (syllables >= 3) { complexCount += 1 } } return complexCount } function estimateReadingTime(wordCount: number, fleschReadingEase: number): string { // Base reading speed in words per minute (WPM) // Average adult reading speed is around 200-250 WPM for normal text let baseWPM = 200 // Adjust reading speed based on text complexity (Flesch Reading Ease score) // Higher scores = easier text = faster reading // Lower scores = harder text = slower reading if (fleschReadingEase >= 90) { baseWPM = 250 // Very easy - fast reading } else if (fleschReadingEase >= 80) { baseWPM = 230 // Easy - slightly faster } else if (fleschReadingEase >= 70) { baseWPM = 210 // Fairly easy - normal speed } else if (fleschReadingEase >= 60) { baseWPM = 200 // Standard - average speed } else if (fleschReadingEase >= 50) { baseWPM = 180 // Fairly difficult - slower } else if (fleschReadingEase >= 30) { baseWPM = 160 // Difficult - much slower } else { baseWPM = 140 // Very difficult - very slow } // Calculate reading time in minutes const readingTimeMinutes = wordCount / baseWPM // Format the output if (readingTimeMinutes < 1) { return '< 1 min' } else if (readingTimeMinutes < 60) { return `${Math.round(readingTimeMinutes)} min` } else { const hours = Math.floor(readingTimeMinutes / 60) const minutes = Math.round(readingTimeMinutes % 60) return minutes > 0 ? `${hours}h ${minutes}m` : `${hours}h` } } function generateReport(results: PageReadability[]): string { if (results.length === 0) { return '## 📊 Readability Report\n\nNo content changes found to analyze.' } let report = '## 📊 Readability Report\n\n' report += `Analyzed ${results.length} changed documentation page${results.length === 1 ? '' : 's'}.\n\n` // Summary table report += '### Summary\n\n' report += '| Page | Flesch Reading Ease | Grade Level | Reading Time | Words |\n' report += '|------|:-------------------:|:-----------:|:------------:|:-----:|\n' for (const result of results) { const grade = result.metrics.fleschKincaidGrade const ease = result.metrics.fleschReadingEase const readingTime = result.estimatedReadingTime // Determine readability assessment let easeAssessment = '❓' if (ease >= 60) easeAssessment = '🟢' else if (ease >= 30) easeAssessment = '🟡' else easeAssessment = '🔴' report += `| [${result.title}](${result.url}) | ${ease} ${easeAssessment} | ${grade} | ${readingTime} | ${result.wordCount} |\n` } // Detailed metrics report += '\n### Detailed Metrics\n\n' for (const result of results) { report += `#### ${result.title}\n\n` report += `**File:** \`${result.path}\` \n` report += `**URL:** ${result.url} \n` report += `**Words:** ${result.wordCount} | **Sentences:** ${result.sentenceCount} | **Est. Reading Time:** ${result.estimatedReadingTime}\n\n` report += '| Metric | Score | Target | Assessment |\n' report += '|--------|:-----:|:------:|:----------:|\n' const metrics = [ { name: 'Flesch Reading Ease', score: result.metrics.fleschReadingEase, target: '60+', assessment: result.metrics.fleschReadingEase >= 60 ? '🟢 Good' : result.metrics.fleschReadingEase >= 30 ? '🟡 Fair' : '🔴 Difficult', }, { name: 'Flesch-Kincaid Grade', score: result.metrics.fleschKincaidGrade, target: '8 or less', assessment: result.metrics.fleschKincaidGrade <= 8 ? '🟢 Good' : result.metrics.fleschKincaidGrade <= 12 ? '🟡 Fair' : '🔴 High', }, { name: 'Gunning Fog Index', score: result.metrics.gunningFog, target: '8 or less', assessment: result.metrics.gunningFog <= 8 ? '🟢 Good' : result.metrics.gunningFog <= 12 ? '🟡 Fair' : '🔴 High', }, { name: 'Coleman-Liau Index', score: result.metrics.colemanLiau, target: '8 or less', assessment: result.metrics.colemanLiau <= 8 ? '🟢 Good' : result.metrics.colemanLiau <= 12 ? '🟡 Fair' : '🔴 High', }, { name: 'Automated Readability Index', score: result.metrics.automatedReadabilityIndex, target: '8 or less', assessment: result.metrics.automatedReadabilityIndex <= 8 ? '🟢 Good' : result.metrics.automatedReadabilityIndex <= 12 ? '🟡 Fair' : '🔴 High', }, { name: 'FORCAST Grade Level', score: result.metrics.forcastGrade, target: '9-10', assessment: result.metrics.forcastGrade <= 10 ? '🟢 Good' : result.metrics.forcastGrade <= 12 ? '🟡 Fair' : '🔴 High', }, ] for (const metric of metrics) { report += `| ${metric.name} | ${metric.score} | ${metric.target} | ${metric.assessment} |\n` } report += '\n' } // Guidelines report += '### 📖 Readability Guidelines\n\n' report += '**Target Audience:** Technical users (developers, administrators)\n\n' report += '**Reading Time Estimation:**\n' report += '- Based on complexity-adjusted reading speed (140-250 WPM)\n' report += '- Easier content (higher Flesch scores) = faster reading\n' report += '- More complex content = slower reading pace\n\n' report += '**General Tips for Improvement:**\n' report += '- **Sentences:** Aim for 15-20 words per sentence on average\n' report += '- **Word choice:** Choose simpler alternatives when possible\n' report += '- **Paragraphs:** Keep paragraphs under 75-100 words\n' report += '- **Voice:** Use active voice (e.g., "Click the button" vs "The button should be clicked")\n' report += '- **Technical terms:** Define acronyms and jargon on first use\n' report += '- **FORCAST improvement:** Use more single-syllable words (aim for 110+ per 150 words)\n\n' // Add overall recommendations based on results const avgFleschEase = results.reduce((sum, r) => sum + r.metrics.fleschReadingEase, 0) / results.length if (avgFleschEase < 60) { if (results.length === 1) { report += '**Priority Focus:** This page scored below 60 for Flesch Reading Ease. Consider breaking up complex sentences and using simpler vocabulary.\n\n' } else { report += '**Priority Focus:** Several pages scored below 60 for Flesch Reading Ease. Consider breaking up complex sentences and using simpler vocabulary.\n\n' } } return report } try { await main() } catch (error) { console.error('Readability analysis failed:', error) process.exit(1) }