| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import fs from 'fs' |
| | import path from 'path' |
| |
|
| | import cheerio from 'cheerio' |
| | import { fetchWithRetry } from '@/frame/lib/fetch-utils' |
| |
|
| | interface ReadabilityMetrics { |
| | fleschReadingEase: number |
| | fleschKincaidGrade: number |
| | gunningFog: number |
| | colemanLiau: number |
| | automatedReadabilityIndex: number |
| | smogIndex: number |
| | forcastGrade: number |
| | } |
| |
|
| | interface PageReadability { |
| | path: string |
| | url: string |
| | title: string |
| | metrics: ReadabilityMetrics |
| | wordCount: number |
| | sentenceCount: number |
| | estimatedReadingTime: string |
| | } |
| |
|
| | async function main() { |
| | |
| | const args = process.argv.slice(2) |
| | if (args.includes('--help') || args.includes('-h')) { |
| | console.log(` |
| | Usage: npm run readability-report [-- --paths <file1> <file2> ...] |
| | |
| | Examples: |
| | # Analyze specific files |
| | npm run readability-report -- --paths content/copilot/using-github-copilot.md content/get-started/quickstart.md |
| | |
| | # Analyze a single file |
| | npm run readability-report -- --paths content/copilot/using-github-copilot.md |
| | |
| | # Use environment variable (for CI) |
| | CHANGED_FILES="content/file1.md content/file2.md" npm run readability-report |
| | |
| | Note: Requires a local server running on localhost:4000 (npm start) |
| | `) |
| | return |
| | } |
| |
|
| | console.log('Starting readability analysis...') |
| |
|
| | |
| | const changedFiles = getChangedContentFiles() |
| |
|
| | if (changedFiles.length === 0) { |
| | console.log('No content files to analyze. Use --help for usage information.') |
| | return |
| | } |
| |
|
| | console.log(`Analyzing readability for ${changedFiles.length} changed files:`) |
| | for (const file of changedFiles) { |
| | console.log(` - ${file}`) |
| | } |
| |
|
| | |
| | await waitForServer() |
| |
|
| | |
| | const results: PageReadability[] = [] |
| |
|
| | for (const filePath of changedFiles) { |
| | try { |
| | const result = await analyzeFile(filePath) |
| | if (result) { |
| | results.push(result) |
| | console.log(`β Analyzed: ${result.path}`) |
| | } |
| | } catch (error) { |
| | console.error(`β Failed to analyze ${filePath}:`, (error as Error).message) |
| | } |
| | } |
| |
|
| | |
| | const report = generateReport(results) |
| |
|
| | |
| | console.log(`\n${report}`) |
| |
|
| | |
| | if (process.env.GITHUB_ACTIONS) { |
| | fs.writeFileSync('readability-report.md', report) |
| | console.log('\nReport saved to readability-report.md') |
| | } |
| | } |
| |
|
| | function getChangedContentFiles(): string[] { |
| | |
| | const args = process.argv.slice(2) |
| | const pathsIndex = args.indexOf('--paths') |
| |
|
| | if (pathsIndex !== -1 && pathsIndex + 1 < args.length) { |
| | |
| | const paths: string[] = [] |
| | for (let i = pathsIndex + 1; i < args.length; i++) { |
| | if (args[i].startsWith('--')) break |
| | paths.push(args[i]) |
| | } |
| |
|
| | return paths.filter((filePath) => { |
| | |
| | return ( |
| | filePath.endsWith('.md') && |
| | filePath.split(path.sep)[0] === 'content' && |
| | path.basename(filePath) !== 'README.md' |
| | ) |
| | }) |
| | } |
| |
|
| | |
| | const spaceSeparatedList = process.env.CHANGED_FILES || '' |
| | return spaceSeparatedList.split(/\s+/g).filter((filePath) => { |
| | |
| | return ( |
| | filePath.endsWith('.md') && |
| | filePath.split(path.sep)[0] === 'content' && |
| | path.basename(filePath) !== 'README.md' |
| | ) |
| | }) |
| | } |
| |
|
| | function makeURL(urlPath: string): string { |
| | return `http://localhost:4000${urlPath}` |
| | } |
| |
|
| | async function waitForServer(): Promise<void> { |
| | console.log('Waiting for server to be ready...') |
| |
|
| | const maxAttempts = 30 |
| | const delayMs = 2000 |
| |
|
| | for (let attempt = 1; attempt <= maxAttempts; attempt++) { |
| | try { |
| | const response = await fetchWithRetry(makeURL('/'), undefined, { |
| | timeout: 5000, |
| | }) |
| | if (!response.ok) { |
| | throw new Error(`HTTP ${response.status}: ${response.statusText}`) |
| | } |
| | console.log('Server is ready!') |
| | return |
| | } catch (error) { |
| | if (attempt === maxAttempts) { |
| | console.error('Server failed to start. Last error:', (error as Error).message) |
| | throw new Error(`Server failed to start after ${maxAttempts} attempts`) |
| | } |
| | console.log( |
| | `Attempt ${attempt}/${maxAttempts} failed (${(error as Error).message}), retrying in ${delayMs}ms...`, |
| | ) |
| | await new Promise((resolve) => setTimeout(resolve, delayMs)) |
| | } |
| | } |
| | } |
| |
|
| | async function analyzeFile(filePath: string): Promise<PageReadability | null> { |
| | |
| | |
| | const urlPath = `/${filePath |
| | .replace(/^content\//, '') |
| | .replace(/\.md$/, '') |
| | .replace(/\/index$/, '')}` |
| |
|
| | try { |
| | |
| | const response = await fetchWithRetry(makeURL(urlPath), undefined, { |
| | timeout: 30000, |
| | throwHttpErrors: false, |
| | }) |
| |
|
| | if (response.status !== 200) { |
| | console.warn(`Skipping ${urlPath}: HTTP ${response.status}`) |
| | return null |
| | } |
| |
|
| | |
| | const body = await response.text() |
| | const $ = cheerio.load(body) |
| |
|
| | |
| | const title = $('h1').first().text().trim() || $('title').text().trim() || 'Untitled' |
| |
|
| | |
| | |
| | const contentSelectors = [ |
| | 'article .markdown-body', |
| | '.markdown-body', |
| | 'article', |
| | 'main', |
| | '[data-testid="lead"]', |
| | '#article-contents', |
| | ] |
| |
|
| | let contentText = '' |
| | for (const selector of contentSelectors) { |
| | const element = $(selector) |
| | if (element.length > 0) { |
| | |
| | element.find('pre, code, .highlight').remove() |
| | element.find('nav, .breadcrumb, .pagination').remove() |
| | element.find('[data-testid="breadcrumbs"]').remove() |
| | element.find('.js-search-results').remove() |
| | element.find('aside, .sidebar').remove() |
| | element.find('.edit-this-page').remove() |
| |
|
| | |
| | contentText = element.text() |
| |
|
| | |
| | contentText = contentText |
| | .replace(/\s+/g, ' ') |
| | .trim() |
| |
|
| | break |
| | } |
| | } |
| |
|
| | if (!contentText.trim()) { |
| | console.warn(`No content found for ${urlPath}`) |
| | return null |
| | } |
| |
|
| | |
| | const metrics = await calculateReadability(contentText) |
| |
|
| | |
| | const estimatedReadingTime = estimateReadingTime( |
| | countWords(contentText), |
| | metrics.fleschReadingEase, |
| | ) |
| |
|
| | return { |
| | path: filePath, |
| | url: urlPath, |
| | title, |
| | metrics, |
| | wordCount: countWords(contentText), |
| | sentenceCount: countSentences(contentText), |
| | estimatedReadingTime, |
| | } |
| | } catch (error) { |
| | throw new Error(`Failed to analyze ${urlPath}: ${(error as Error).message}`) |
| | } |
| | } |
| |
|
| | async function calculateReadability(text: string): Promise<ReadabilityMetrics> { |
| | |
| | const words = countWords(text) |
| | const sentences = countSentences(text) |
| | const syllables = countSyllables(text) |
| | const complexWords = countComplexWords(text) |
| | const singleSyllableWords = countSingleSyllableWords(text) |
| |
|
| | if (sentences === 0 || words === 0) { |
| | return { |
| | fleschReadingEase: 0, |
| | fleschKincaidGrade: 0, |
| | gunningFog: 0, |
| | colemanLiau: 0, |
| | automatedReadabilityIndex: 0, |
| | smogIndex: 0, |
| | forcastGrade: 0, |
| | } |
| | } |
| |
|
| | |
| | const avgSentenceLength = words / sentences |
| | const avgSyllablesPerWord = syllables / words |
| | const fleschReadingEase = Math.max( |
| | 0, |
| | 206.835 - 1.015 * avgSentenceLength - 84.6 * avgSyllablesPerWord, |
| | ) |
| |
|
| | |
| | const fleschKincaidGrade = 0.39 * avgSentenceLength + 11.8 * avgSyllablesPerWord - 15.59 |
| |
|
| | |
| | const complexWordPercentage = (complexWords / words) * 100 |
| | const gunningFog = 0.4 * (avgSentenceLength + complexWordPercentage) |
| |
|
| | |
| | const avgCharsPer100Words = (text.replace(/\s/g, '').length / words) * 100 |
| | const avgSentencesPer100Words = (sentences / words) * 100 |
| | const colemanLiau = 0.0588 * avgCharsPer100Words - 0.296 * avgSentencesPer100Words - 15.8 |
| |
|
| | |
| | const avgCharsPerWord = text.replace(/\s/g, '').length / words |
| | const automatedReadabilityIndex = 4.71 * avgCharsPerWord + 0.5 * avgSentenceLength - 21.43 |
| |
|
| | |
| | const smogIndex = 1.043 * Math.sqrt((complexWords * 30) / sentences) + 3.1291 |
| |
|
| | |
| | |
| | const singleSyllablePer150Words = (singleSyllableWords / words) * 150 |
| | const forcastGrade = Math.max(5, 20 - singleSyllablePer150Words / 10) |
| |
|
| | return { |
| | fleschReadingEase: Math.round(fleschReadingEase * 100) / 100, |
| | fleschKincaidGrade: Math.max(0, Math.round(fleschKincaidGrade * 100) / 100), |
| | gunningFog: Math.max(0, Math.round(gunningFog * 100) / 100), |
| | colemanLiau: Math.max(0, Math.round(colemanLiau * 100) / 100), |
| | automatedReadabilityIndex: Math.max(0, Math.round(automatedReadabilityIndex * 100) / 100), |
| | smogIndex: Math.max(0, Math.round(smogIndex * 100) / 100), |
| | forcastGrade: Math.max(5, Math.round(forcastGrade * 100) / 100), |
| | } |
| | } |
| |
|
| | function countSyllablesInWord(word: string): number { |
| | |
| | const cleanWord = word.toLowerCase().replace(/[^a-z]/g, '') |
| | if (cleanWord.length === 0) return 0 |
| |
|
| | |
| | const vowelGroups = cleanWord.match(/[aeiouy]+/g) || [] |
| | let syllables = vowelGroups.length |
| |
|
| | |
| | if (cleanWord.endsWith('e') && syllables > 1) { |
| | syllables -= 1 |
| | } |
| |
|
| | |
| | return Math.max(1, syllables) |
| | } |
| |
|
| | function countWords(text: string): number { |
| | |
| | const cleanText = text |
| | .replace(/\s+/g, ' ') |
| | .trim() |
| |
|
| | if (!cleanText) return 0 |
| |
|
| | |
| | const words = cleanText.split(/\s+/).filter((word) => { |
| | |
| | const cleanWord = word.replace(/[^\w]/g, '') |
| | return cleanWord.length > 0 |
| | }) |
| |
|
| | return words.length |
| | } |
| |
|
| | function countSentences(text: string): number { |
| | |
| | const cleanText = text |
| | .replace(/\s+/g, ' ') |
| | .trim() |
| |
|
| | |
| | |
| | const sentences = cleanText |
| | .split(/[.!?]+(?=\s|$)/) |
| | .map((s) => s.trim()) |
| | .filter((s) => s.length > 0) |
| |
|
| | return Math.max(1, sentences.length) |
| | } |
| |
|
| | function countSyllables(text: string): number { |
| | |
| | const words = text.toLowerCase().split(/\s+/) |
| | let syllableCount = 0 |
| |
|
| | for (const word of words) { |
| | if (word.length === 0) continue |
| | syllableCount += countSyllablesInWord(word) |
| | } |
| |
|
| | return syllableCount |
| | } |
| |
|
| | function countSingleSyllableWords(text: string): number { |
| | |
| | const words = text.toLowerCase().split(/\s+/) |
| | let singleSyllableCount = 0 |
| |
|
| | for (const word of words) { |
| | if (word.length === 0) continue |
| |
|
| | const syllables = countSyllablesInWord(word) |
| | if (syllables === 1) { |
| | singleSyllableCount += 1 |
| | } |
| | } |
| |
|
| | return singleSyllableCount |
| | } |
| |
|
| | function countComplexWords(text: string): number { |
| | |
| | const words = text.toLowerCase().split(/\s+/) |
| | let complexCount = 0 |
| |
|
| | for (const word of words) { |
| | if (word.length === 0) continue |
| |
|
| | const syllables = countSyllablesInWord(word) |
| | if (syllables >= 3) { |
| | complexCount += 1 |
| | } |
| | } |
| |
|
| | return complexCount |
| | } |
| |
|
| | function estimateReadingTime(wordCount: number, fleschReadingEase: number): string { |
| | |
| | |
| | let baseWPM = 200 |
| |
|
| | |
| | |
| | |
| | if (fleschReadingEase >= 90) { |
| | baseWPM = 250 |
| | } else if (fleschReadingEase >= 80) { |
| | baseWPM = 230 |
| | } else if (fleschReadingEase >= 70) { |
| | baseWPM = 210 |
| | } else if (fleschReadingEase >= 60) { |
| | baseWPM = 200 |
| | } else if (fleschReadingEase >= 50) { |
| | baseWPM = 180 |
| | } else if (fleschReadingEase >= 30) { |
| | baseWPM = 160 |
| | } else { |
| | baseWPM = 140 |
| | } |
| |
|
| | |
| | const readingTimeMinutes = wordCount / baseWPM |
| |
|
| | |
| | if (readingTimeMinutes < 1) { |
| | return '< 1 min' |
| | } else if (readingTimeMinutes < 60) { |
| | return `${Math.round(readingTimeMinutes)} min` |
| | } else { |
| | const hours = Math.floor(readingTimeMinutes / 60) |
| | const minutes = Math.round(readingTimeMinutes % 60) |
| | return minutes > 0 ? `${hours}h ${minutes}m` : `${hours}h` |
| | } |
| | } |
| |
|
| | function generateReport(results: PageReadability[]): string { |
| | if (results.length === 0) { |
| | return '## π Readability Report\n\nNo content changes found to analyze.' |
| | } |
| |
|
| | let report = '## π Readability Report\n\n' |
| | report += `Analyzed ${results.length} changed documentation page${results.length === 1 ? '' : 's'}.\n\n` |
| |
|
| | |
| | report += '### Summary\n\n' |
| | report += '| Page | Flesch Reading Ease | Grade Level | Reading Time | Words |\n' |
| | report += '|------|:-------------------:|:-----------:|:------------:|:-----:|\n' |
| |
|
| | for (const result of results) { |
| | const grade = result.metrics.fleschKincaidGrade |
| | const ease = result.metrics.fleschReadingEase |
| | const readingTime = result.estimatedReadingTime |
| |
|
| | |
| | let easeAssessment = 'β' |
| | if (ease >= 60) easeAssessment = 'π’' |
| | else if (ease >= 30) easeAssessment = 'π‘' |
| | else easeAssessment = 'π΄' |
| |
|
| | report += `| [${result.title}](${result.url}) | ${ease} ${easeAssessment} | ${grade} | ${readingTime} | ${result.wordCount} |\n` |
| | } |
| |
|
| | |
| | report += '\n### Detailed Metrics\n\n' |
| |
|
| | for (const result of results) { |
| | report += `#### ${result.title}\n\n` |
| | report += `**File:** \`${result.path}\` \n` |
| | report += `**URL:** ${result.url} \n` |
| | report += `**Words:** ${result.wordCount} | **Sentences:** ${result.sentenceCount} | **Est. Reading Time:** ${result.estimatedReadingTime}\n\n` |
| |
|
| | report += '| Metric | Score | Target | Assessment |\n' |
| | report += '|--------|:-----:|:------:|:----------:|\n' |
| |
|
| | const metrics = [ |
| | { |
| | name: 'Flesch Reading Ease', |
| | score: result.metrics.fleschReadingEase, |
| | target: '60+', |
| | assessment: |
| | result.metrics.fleschReadingEase >= 60 |
| | ? 'π’ Good' |
| | : result.metrics.fleschReadingEase >= 30 |
| | ? 'π‘ Fair' |
| | : 'π΄ Difficult', |
| | }, |
| | { |
| | name: 'Flesch-Kincaid Grade', |
| | score: result.metrics.fleschKincaidGrade, |
| | target: '8 or less', |
| | assessment: |
| | result.metrics.fleschKincaidGrade <= 8 |
| | ? 'π’ Good' |
| | : result.metrics.fleschKincaidGrade <= 12 |
| | ? 'π‘ Fair' |
| | : 'π΄ High', |
| | }, |
| | { |
| | name: 'Gunning Fog Index', |
| | score: result.metrics.gunningFog, |
| | target: '8 or less', |
| | assessment: |
| | result.metrics.gunningFog <= 8 |
| | ? 'π’ Good' |
| | : result.metrics.gunningFog <= 12 |
| | ? 'π‘ Fair' |
| | : 'π΄ High', |
| | }, |
| | { |
| | name: 'Coleman-Liau Index', |
| | score: result.metrics.colemanLiau, |
| | target: '8 or less', |
| | assessment: |
| | result.metrics.colemanLiau <= 8 |
| | ? 'π’ Good' |
| | : result.metrics.colemanLiau <= 12 |
| | ? 'π‘ Fair' |
| | : 'π΄ High', |
| | }, |
| | { |
| | name: 'Automated Readability Index', |
| | score: result.metrics.automatedReadabilityIndex, |
| | target: '8 or less', |
| | assessment: |
| | result.metrics.automatedReadabilityIndex <= 8 |
| | ? 'π’ Good' |
| | : result.metrics.automatedReadabilityIndex <= 12 |
| | ? 'π‘ Fair' |
| | : 'π΄ High', |
| | }, |
| | { |
| | name: 'FORCAST Grade Level', |
| | score: result.metrics.forcastGrade, |
| | target: '9-10', |
| | assessment: |
| | result.metrics.forcastGrade <= 10 |
| | ? 'π’ Good' |
| | : result.metrics.forcastGrade <= 12 |
| | ? 'π‘ Fair' |
| | : 'π΄ High', |
| | }, |
| | ] |
| |
|
| | for (const metric of metrics) { |
| | report += `| ${metric.name} | ${metric.score} | ${metric.target} | ${metric.assessment} |\n` |
| | } |
| |
|
| | report += '\n' |
| | } |
| |
|
| | |
| | report += '### π Readability Guidelines\n\n' |
| | report += '**Target Audience:** Technical users (developers, administrators)\n\n' |
| | report += '**Reading Time Estimation:**\n' |
| | report += '- Based on complexity-adjusted reading speed (140-250 WPM)\n' |
| | report += '- Easier content (higher Flesch scores) = faster reading\n' |
| | report += '- More complex content = slower reading pace\n\n' |
| | report += '**General Tips for Improvement:**\n' |
| | report += '- **Sentences:** Aim for 15-20 words per sentence on average\n' |
| | report += '- **Word choice:** Choose simpler alternatives when possible\n' |
| | report += '- **Paragraphs:** Keep paragraphs under 75-100 words\n' |
| | report += |
| | '- **Voice:** Use active voice (e.g., "Click the button" vs "The button should be clicked")\n' |
| | report += '- **Technical terms:** Define acronyms and jargon on first use\n' |
| | report += |
| | '- **FORCAST improvement:** Use more single-syllable words (aim for 110+ per 150 words)\n\n' |
| |
|
| | |
| | const avgFleschEase = |
| | results.reduce((sum, r) => sum + r.metrics.fleschReadingEase, 0) / results.length |
| |
|
| | if (avgFleschEase < 60) { |
| | if (results.length === 1) { |
| | report += |
| | '**Priority Focus:** This page scored below 60 for Flesch Reading Ease. Consider breaking up complex sentences and using simpler vocabulary.\n\n' |
| | } else { |
| | report += |
| | '**Priority Focus:** Several pages scored below 60 for Flesch Reading Ease. Consider breaking up complex sentences and using simpler vocabulary.\n\n' |
| | } |
| | } |
| |
|
| | return report |
| | } |
| |
|
| | try { |
| | await main() |
| | } catch (error) { |
| | console.error('Readability analysis failed:', error) |
| | process.exit(1) |
| | } |
| |
|