| | |
| |
|
| | import fs from 'fs' |
| | import path from 'path' |
| |
|
| | import cheerio from 'cheerio' |
| | import coreLib from '@actions/core' |
| | import { fetchWithRetry } from '@/frame/lib/fetch-utils' |
| | import chalk from 'chalk' |
| | import { JSONFilePreset } from 'lowdb/node' |
| | import { type Octokit } from '@octokit/rest' |
| | import type { Response } from 'express' |
| |
|
| | import type { ExtendedRequest, Page, Permalink, Context } from '@/types' |
| | import shortVersions from '@/versions/middleware/short-versions' |
| | import contextualize from '@/frame/middleware/context/context' |
| | import features from '@/versions/middleware/features' |
| | import getRedirect from '@/redirects/lib/get-redirect' |
| | import warmServer from '@/frame/lib/warm-server' |
| | import { liquid } from '@/content-render/index' |
| | import { deprecated } from '@/versions/lib/enterprise-server-releases' |
| | import excludedLinks from '@/links/lib/excluded-links' |
| | import { getEnvInputs, boolEnvVar } from '@/workflows/get-env-inputs' |
| | import { debugTimeEnd, debugTimeStart } from './debug-time-taken' |
| | import { uploadArtifact as uploadArtifactLib } from './upload-artifact' |
| | import github from '@/workflows/github' |
| | import { getActionContext } from '@/workflows/action-context' |
| | import { createMinimalProcessor } from '@/content-render/unified/processor' |
| | import { createReportIssue, linkReports } from '@/workflows/issue-report' |
| | import { type CoreInject } from '@/links/scripts/action-injections' |
| |
|
| | type Flaw = { |
| | WARNING?: string |
| | CRITICAL?: string |
| | isExternal?: boolean |
| | } |
| |
|
| | type LinkFlaw = { |
| | page: Page |
| | permalink: Permalink |
| | href?: string |
| | url?: string |
| | text?: string |
| | src: string |
| | flaw: Flaw |
| | } |
| |
|
| | type Redirects = Record<string, string> |
| | type PageMap = Record<string, Page> |
| |
|
| | type UploadArtifact = (name: string, message: string) => void |
| |
|
| | type Options = { |
| | level?: string |
| | files?: string[] |
| | random?: boolean |
| | language?: string | string[] |
| | filter?: string[] |
| | version?: string | string[] |
| | max?: number |
| | linkReports?: boolean |
| | actionUrl?: string |
| | verbose?: boolean |
| | checkExternalLinks?: boolean |
| | createReport?: boolean |
| | failOnFlaw?: boolean |
| | shouldComment?: boolean |
| | reportRepository?: string |
| | reportAuthor?: string |
| | reportLabel?: string |
| | checkAnchors?: boolean |
| | checkImages?: boolean |
| | patient?: boolean |
| | externalServerErrorsAsWarning?: string |
| | verboseUrl?: string |
| | bail?: boolean |
| | commentLimitToExternalLinks?: boolean |
| | actionContext?: any |
| | concurrency?: number |
| | } |
| |
|
| | |
| | const DEFAULT_CONCURRENCY_LIMIT = 3 |
| |
|
| | const STATIC_PREFIXES: Record<string, string> = { |
| | assets: path.resolve('assets'), |
| | public: path.resolve(path.join('src', 'graphql', 'data')), |
| | } |
| | |
| | for (const [key, value] of Object.entries(STATIC_PREFIXES)) { |
| | if (!fs.existsSync(value)) { |
| | throw new Error(`Can't find static prefix (${key}): ${value}`) |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | const EXTERNAL_LINK_CHECKER_MAX_AGE_MS = |
| | parseInt(process.env.EXTERNAL_LINK_CHECKER_MAX_AGE_DAYS || '7') * 24 * 60 * 60 * 1000 |
| | const EXTERNAL_LINK_CHECKER_DB = |
| | process.env.EXTERNAL_LINK_CHECKER_DB || 'external-link-checker-db.json' |
| |
|
| | |
| | type Data = { |
| | urls: { |
| | [url: string]: { |
| | timestamp: number |
| | result: { |
| | ok: boolean |
| | statusCode: number |
| | } |
| | } |
| | } |
| | } |
| | const defaultData: Data = { urls: {} } |
| | const externalLinkCheckerDB = await JSONFilePreset<Data>(EXTERNAL_LINK_CHECKER_DB, defaultData) |
| |
|
| | type DBType = typeof externalLinkCheckerDB |
| |
|
| | |
| | async function limitConcurrency<T, R>( |
| | items: T[], |
| | asyncFn: (item: T) => Promise<R>, |
| | limit: number = 3, |
| | ): Promise<R[]> { |
| | const results: Promise<R>[] = [] |
| | const executing = new Set<Promise<R>>() |
| |
|
| | for (const item of items) { |
| | const createPromise = async () => { |
| | const result = await asyncFn(item) |
| | executing.delete(promise) |
| | return result |
| | } |
| | const promise = createPromise() |
| |
|
| | results.push(promise) |
| | executing.add(promise) |
| |
|
| | if (executing.size >= limit) { |
| | await Promise.race(executing) |
| | } |
| | } |
| |
|
| | return Promise.all(results) |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | function jitter(base: number, percentage: number) { |
| | const r = percentage / 100 |
| | const negative = Math.random() > 0.5 ? -1 : 1 |
| | return base + base * Math.random() * r * negative |
| | } |
| | |
| | |
| | |
| | |
| | function linksToSkipFactory() { |
| | const set = new Set(excludedLinks.map(({ is }) => is).filter(Boolean)) |
| | const arr = excludedLinks.map(({ startsWith }) => startsWith).filter(Boolean) |
| | return (href: string) => set.has(href) || arr.some((v) => v && href.startsWith(v)) |
| | } |
| |
|
| | const linksToSkip = linksToSkipFactory() |
| |
|
| | const CONTENT_ROOT = path.resolve('content') |
| |
|
| | const deprecatedVersionPrefixesRegex = new RegExp( |
| | `enterprise(-server@|/)(${deprecated.join('|')})(/|$)`, |
| | ) |
| |
|
| | |
| | if (import.meta.url.endsWith(process.argv[1])) { |
| | |
| | const { |
| | ACTION_RUN_URL, |
| | LEVEL, |
| | FILES_CHANGED, |
| | REPORT_REPOSITORY, |
| | REPORT_AUTHOR, |
| | REPORT_LABEL, |
| | EXTERNAL_SERVER_ERRORS_AS_WARNINGS, |
| | CHECK_ANCHORS, |
| | CONCURRENCY, |
| | } = process.env |
| |
|
| | const octokit = github() |
| |
|
| | |
| | let files |
| | if (FILES_CHANGED) { |
| | const fileList = JSON.parse(FILES_CHANGED) |
| | if (Array.isArray(fileList) && fileList.length > 0) { |
| | files = fileList |
| | } else { |
| | console.warn(`No changed files found in PR: ${FILES_CHANGED}. Exiting...`) |
| | process.exit(0) |
| | } |
| | } |
| |
|
| | const opts: Options = { |
| | level: LEVEL, |
| | files, |
| | verbose: true, |
| | linkReports: true, |
| | checkImages: true, |
| | checkAnchors: Boolean(CHECK_ANCHORS), |
| | patient: boolEnvVar('PATIENT'), |
| | random: false, |
| | language: 'en', |
| | actionUrl: ACTION_RUN_URL, |
| | checkExternalLinks: boolEnvVar('CHECK_EXTERNAL_LINKS'), |
| | shouldComment: boolEnvVar('SHOULD_COMMENT'), |
| | commentLimitToExternalLinks: boolEnvVar('COMMENT_LIMIT_TO_EXTERNAL_LINKS'), |
| | failOnFlaw: boolEnvVar('FAIL_ON_FLAW'), |
| | createReport: boolEnvVar('CREATE_REPORT'), |
| | reportRepository: REPORT_REPOSITORY, |
| | reportLabel: REPORT_LABEL, |
| | reportAuthor: REPORT_AUTHOR, |
| | actionContext: getActionContext(), |
| | externalServerErrorsAsWarning: EXTERNAL_SERVER_ERRORS_AS_WARNINGS, |
| | concurrency: CONCURRENCY ? parseInt(CONCURRENCY, 10) : DEFAULT_CONCURRENCY_LIMIT, |
| | } |
| |
|
| | if (opts.shouldComment || opts.createReport) { |
| | |
| | |
| | getEnvInputs(['GITHUB_TOKEN']) |
| | } |
| |
|
| | main(coreLib, octokit, uploadArtifactLib, opts) |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | async function main( |
| | core: any, |
| | octokit: Octokit, |
| | uploadArtifact: UploadArtifact, |
| | opts: Options = {}, |
| | ) { |
| | const { |
| | level = 'warning', |
| | files = [], |
| | random, |
| | language = 'en', |
| | filter, |
| | version, |
| | max, |
| | verbose, |
| | checkExternalLinks = false, |
| | createReport = false, |
| | failOnFlaw = false, |
| | shouldComment = false, |
| | reportRepository = 'github/docs-content', |
| | reportAuthor = 'docs-bot', |
| | reportLabel = 'broken link report', |
| | concurrency = DEFAULT_CONCURRENCY_LIMIT, |
| | } = opts |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | const { redirects, pages: pageMap, pageList } = await warmServer([]) |
| |
|
| | if (files.length) { |
| | core.debug(`Limitting to files list: ${files.join(', ')}`) |
| | } |
| |
|
| | let languages = language |
| | if (!Array.isArray(languages)) { |
| | languages = [languages] |
| | } |
| |
|
| | const filters = filter || [] |
| | if (filters && !Array.isArray(filters)) { |
| | throw new Error(`filters, ${filters} is not an array`) |
| | } |
| |
|
| | let versions = version || [] |
| | if (versions && typeof versions === 'string') { |
| | versions = [versions] |
| | } else if (!Array.isArray(versions)) { |
| | throw new Error(`versions, '${version}' is not an array`) |
| | } |
| |
|
| | if (random) { |
| | shuffle(pageList) |
| | } |
| |
|
| | debugTimeStart(core, 'getPages') |
| | const pages = getPages(pageList, languages, filters, files, max) |
| | debugTimeEnd(core, 'getPages') |
| |
|
| | if (checkExternalLinks && pages.length >= 100) { |
| | core.warning( |
| | `Warning! Checking external URLs can be time costly. You're testing ${pages.length} pages.`, |
| | ) |
| | } |
| |
|
| | await externalLinkCheckerDB.read() |
| |
|
| | if (verbose && checkExternalLinks) { |
| | core.info(`Checking of external links is is cached to ${EXTERNAL_LINK_CHECKER_DB}`) |
| | core.info( |
| | `External link cache max age is ${ |
| | EXTERNAL_LINK_CHECKER_MAX_AGE_MS / 1000 / 60 / 60 / 24 |
| | } days`, |
| | ) |
| | let countNotTooOld = 0 |
| | let countTooOld = 0 |
| | for (const { timestamp } of Object.values(externalLinkCheckerDB.data.urls || {})) { |
| | const age = Date.now() - timestamp |
| | if (age > EXTERNAL_LINK_CHECKER_MAX_AGE_MS) { |
| | countTooOld++ |
| | } else { |
| | countNotTooOld++ |
| | } |
| | } |
| | core.info( |
| | `External link cache: ${countNotTooOld.toLocaleString()} are still fresh, ${countTooOld.toLocaleString()} links too old`, |
| | ) |
| | } |
| |
|
| | debugTimeStart(core, 'processPages') |
| | const t0 = new Date().getTime() |
| | const flawsGroups = await limitConcurrency( |
| | pages, |
| | (page: Page) => |
| | processPage( |
| | core, |
| | page, |
| | pageMap, |
| | redirects, |
| | opts, |
| | externalLinkCheckerDB, |
| | versions as string[], |
| | ), |
| | concurrency, |
| | ) |
| | const t1 = new Date().getTime() |
| | debugTimeEnd(core, 'processPages') |
| |
|
| | await externalLinkCheckerDB.write() |
| |
|
| | const flaws = flawsGroups.flat() |
| |
|
| | printGlobalCacheHitRatio(core) |
| |
|
| | if (verbose) { |
| | summarizeCounts(core, pages, (t1 - t0) / 1000) |
| | core.info(`Checked ${(globalCacheHitCount + globalCacheMissCount).toLocaleString()} links`) |
| | } |
| |
|
| | summarizeFlaws(core, flaws) |
| |
|
| | const uniqueHrefs = new Set(flaws.map((flaw) => flaw.href)) |
| |
|
| | if (flaws.length > 0) { |
| | await uploadJsonFlawsArtifact(uploadArtifact, flaws, { |
| | verboseUrl: opts.verboseUrl, |
| | }) |
| | core.info(`All flaws written to artifact log.`) |
| | if (createReport) { |
| | core.info(`Creating issue for flaws...`) |
| | const reportProps = { |
| | core, |
| | octokit, |
| | reportTitle: `${uniqueHrefs.size} broken links found`, |
| | reportBody: flawIssueDisplay(flaws, opts), |
| | reportRepository, |
| | reportLabel, |
| | } |
| | const newReport = await createReportIssue(reportProps) |
| |
|
| | if (linkReports) { |
| | const linkProps = { |
| | core, |
| | octokit, |
| | newReport, |
| | reportRepository, |
| | reportAuthor, |
| | reportLabel, |
| | } |
| | await linkReports(linkProps) |
| | } |
| | } |
| | if (shouldComment) { |
| | await commentOnPR(core, octokit, flaws, opts) |
| | } |
| |
|
| | const flawsInLevel = flaws.filter((flaw) => { |
| | if (level === 'critical') { |
| | return flaw?.flaw?.CRITICAL |
| | } |
| | |
| | return true |
| | }) |
| |
|
| | if (flawsInLevel.length > 0) { |
| | core.setOutput('has_flaws_at_level', flawsInLevel.length > 0) |
| | if (failOnFlaw) { |
| | core.setFailed( |
| | `${flaws.length} broken links found. See action artifact uploads for details`, |
| | ) |
| | process.exit(1) |
| | } |
| | } |
| | } else { |
| | |
| | |
| | if (shouldComment) { |
| | await commentOnPR(core, octokit, flaws, opts) |
| | } |
| | } |
| | } |
| |
|
| | async function commentOnPR(core: CoreInject, octokit: Octokit, flaws: LinkFlaw[], opts: Options) { |
| | const { actionContext = {} } = opts |
| | const { owner, repo } = actionContext |
| | const pullNumber = actionContext?.pull_request?.number |
| | if (!owner || !repo || !pullNumber) { |
| | core.warning(`commentOnPR called outside of PR action runner context. Not creating comment.`) |
| | return |
| | } |
| |
|
| | const findAgainSymbol = '<!-- rendered-content-link-checker-comment-finder -->' |
| |
|
| | const body = flawIssueDisplay(flaws, opts, false) |
| |
|
| | const { data } = await octokit.rest.issues.listComments({ |
| | owner, |
| | repo, |
| | issue_number: pullNumber, |
| | }) |
| | let previousCommentId |
| | for (const { body: commentBody, id } of data) { |
| | if (commentBody && commentBody.includes(findAgainSymbol)) { |
| | previousCommentId = id |
| | } |
| | } |
| |
|
| | |
| | if (!body) { |
| | core.info('No flaws qualify for comment') |
| |
|
| | if (previousCommentId) { |
| | const nothingComment = 'Previous broken links comment now moot. 👌😙' |
| | await octokit.rest.issues.updateComment({ |
| | owner, |
| | repo, |
| | comment_id: previousCommentId, |
| | body: `${nothingComment}\n\n${findAgainSymbol}`, |
| | }) |
| | core.info(`Updated comment on PR: ${pullNumber} (${previousCommentId})`) |
| | } |
| | return |
| | } |
| |
|
| | if (previousCommentId) { |
| | const noteComment = '(*The original automated comment was updated*)' |
| | await octokit.rest.issues.updateComment({ |
| | owner, |
| | repo, |
| | comment_id: previousCommentId, |
| | body: `${body}\n\n${noteComment}\n\n${findAgainSymbol}`, |
| | }) |
| | core.info(`Updated comment on PR: ${pullNumber} (${previousCommentId})`) |
| | return |
| | } |
| |
|
| | try { |
| | await octokit.rest.issues.createComment({ |
| | owner, |
| | repo, |
| | issue_number: pullNumber, |
| | body: `${body}\n\n${findAgainSymbol}`, |
| | }) |
| | core.info(`Created comment on PR: ${pullNumber}`) |
| | } catch (error) { |
| | core.setFailed(`Error commenting on PR when there are flaws`) |
| | throw error |
| | } |
| | } |
| |
|
| | function flawIssueDisplay(flaws: LinkFlaw[], opts: Options, mentionExternalExclusionList = true) { |
| | let output = '' |
| | let flawsToDisplay = 0 |
| |
|
| | type LinkFlawWithPermalink = { |
| | |
| | |
| | href?: string |
| | url?: string |
| | text?: string |
| | src: string |
| | flaw: Flaw |
| | permalinkHrefs: string[] |
| | } |
| | |
| | const hrefsOnPageGroup: Record<string, Record<string, LinkFlawWithPermalink>> = {} |
| | for (const { page, permalink, href, text, src, flaw } of flaws) { |
| | |
| | if (opts.commentLimitToExternalLinks && !flaw.isExternal) { |
| | continue |
| | } |
| |
|
| | flawsToDisplay++ |
| |
|
| | const pageKey = page.fullPath |
| | if (!hrefsOnPageGroup[pageKey]) { |
| | hrefsOnPageGroup[pageKey] = {} |
| | } |
| |
|
| | const linkKey = href || src |
| | if (!hrefsOnPageGroup[pageKey][linkKey]) { |
| | hrefsOnPageGroup[page.fullPath][linkKey] = { href, text, src, flaw, permalinkHrefs: [] } |
| | } |
| |
|
| | if (!hrefsOnPageGroup[pageKey][linkKey].permalinkHrefs.includes(permalink.href)) { |
| | hrefsOnPageGroup[pageKey][linkKey].permalinkHrefs.push(permalink.href) |
| | } |
| | } |
| |
|
| | |
| | if (!flawsToDisplay) { |
| | return '' |
| | } |
| |
|
| | |
| | for (const [pagePath, pageHrefs] of Object.entries(hrefsOnPageGroup)) { |
| | const fullPath = prettyFullPath(pagePath) |
| | output += `\n\n### In \`${fullPath}\`\n` |
| |
|
| | for (const [, hrefObj] of Object.entries(pageHrefs)) { |
| | if (hrefObj.href) { |
| | output += `\n\n - Href: [${hrefObj.href}](${hrefObj.href})` |
| | output += `\n - Text: ${hrefObj.text}` |
| | } else if (hrefObj.src) { |
| | output += `\n\n - Image src: [${hrefObj.src}](${hrefObj.src})` |
| | } else { |
| | output += `\n\n - WORKFLOW ERROR: Flaw has neither 'href' nor 'src'` |
| | } |
| | output += `\n - Flaw: \`${ |
| | hrefObj.flaw.CRITICAL ? hrefObj.flaw.CRITICAL : hrefObj.flaw.WARNING |
| | }\`` |
| | output += `\n - On permalinks` |
| | for (const permalinkHref of hrefObj.permalinkHrefs) { |
| | output += `\n - \`${permalinkHref}\`` |
| | } |
| | } |
| | } |
| |
|
| | if (mentionExternalExclusionList) { |
| | output += |
| | '\n\n---\n\nIf any link reported in this issue is not actually broken ' + |
| | 'and repeatedly shows up on reports, consider making a PR that adds it as an exception to `src/links/lib/excluded-links.ts`. ' + |
| | 'For more information, see [Fixing broken links in GitHub user docs](https://github.com/github/docs/blob/main/src/links/lib/README.md).' |
| | } |
| |
|
| | output = `${flawsToDisplay} broken${ |
| | opts.commentLimitToExternalLinks ? ' **external** ' : ' ' |
| | }links found in [this](${opts.actionUrl}) workflow.\n${output}` |
| |
|
| | |
| | if (output.length > 60000) { |
| | output = `${output.slice(0, 60000)}\n\n---\n\nOUTPUT TRUNCATED` |
| | } |
| |
|
| | return output |
| | } |
| |
|
| | function printGlobalCacheHitRatio(core: CoreInject) { |
| | const hits = globalCacheHitCount |
| | const misses = globalCacheMissCount |
| | |
| | |
| | |
| | if (misses + hits) { |
| | core.debug( |
| | `Cache hit ratio: ${hits.toLocaleString()} of ${(misses + hits).toLocaleString()} (${( |
| | (100 * hits) / |
| | (misses + hits) |
| | ).toFixed(1)}%)`, |
| | ) |
| | } |
| | } |
| |
|
| | function getPages( |
| | pageList: Page[], |
| | languages: string[], |
| | filters: string[], |
| | files: string[], |
| | max: number | undefined, |
| | ) { |
| | return pageList |
| | .filter((page: Page) => { |
| | if (languages.length && !languages.includes(page.languageCode)) { |
| | return false |
| | } |
| |
|
| | if (filters.length && !filters.find((filter) => page.relativePath.includes(filter))) { |
| | return false |
| | } |
| |
|
| | if ( |
| | files.length && |
| | |
| | |
| | !files.find((file) => { |
| | if (page.relativePath === file) return true |
| | if (page.fullPath === file) return true |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | if (path.join(CONTENT_ROOT, page.relativePath) === path.resolve(file)) return true |
| | return false |
| | }) |
| | ) { |
| | return false |
| | } |
| |
|
| | return true |
| | }) |
| | .slice(0, max ? Math.min(max, pageList.length) : pageList.length) |
| | } |
| |
|
| | async function processPage( |
| | core: CoreInject, |
| | page: Page, |
| | pageMap: PageMap, |
| | redirects: Redirects, |
| | opts: Options, |
| | db: DBType, |
| | versions: string[], |
| | ) { |
| | const { verbose, verboseUrl, bail } = opts |
| | const filteredPermalinks = page.permalinks.filter((permalink) => { |
| | return !versions.length || versions.includes(permalink.pageVersion) |
| | }) |
| | const allFlawsEach = await limitConcurrency( |
| | filteredPermalinks, |
| | (permalink) => processPermalink(core, permalink, page, pageMap, redirects, opts, db), |
| | opts.concurrency || DEFAULT_CONCURRENCY_LIMIT, |
| | ) |
| |
|
| | const allFlaws = allFlawsEach.flat() |
| |
|
| | if (allFlaws.length > 0) { |
| | if (verbose) { |
| | printFlaws(core, allFlaws, { verboseUrl }) |
| | } |
| |
|
| | if (bail) { |
| | if (!verbose) { |
| | console.warn('Use --verbose to see the flaws before it exits') |
| | } |
| | throw new Error(`More than one flaw in ${page.relativePath}`) |
| | } |
| | } |
| |
|
| | return allFlaws |
| | } |
| |
|
| | async function processPermalink( |
| | core: any, |
| | permalink: Permalink, |
| | page: Page, |
| | pageMap: PageMap, |
| | redirects: Redirects, |
| | opts: Options, |
| | db: DBType, |
| | ) { |
| | const { |
| | level = 'critical', |
| | checkAnchors, |
| | checkImages, |
| | checkExternalLinks, |
| | verbose, |
| | patient, |
| | externalServerErrorsAsWarning, |
| | } = opts |
| | let html = '' |
| | try { |
| | html = await renderInnerHTML(page, permalink) |
| | } catch (error) { |
| | console.warn( |
| | `The error happened trying to render ${page.relativePath} (permalink: ${permalink.href})`, |
| | ) |
| | throw error |
| | } |
| | const $ = cheerio.load(html, { xmlMode: true }) |
| | const flaws: LinkFlaw[] = [] |
| | const links: cheerio.Element[] = [] |
| | $('a[href]').each((i, link) => { |
| | links.push(link) |
| | }) |
| | const newFlaws: LinkFlaw[] = await limitConcurrency( |
| | links, |
| | async (link) => { |
| | const { href } = (link as cheerio.TagElement).attribs |
| |
|
| | |
| | |
| | if (!href.startsWith('#')) { |
| | if (globalHrefCheckCache.has(href)) { |
| | globalCacheHitCount++ |
| | return globalHrefCheckCache.get(href) |
| | } |
| | globalCacheMissCount++ |
| | } |
| |
|
| | const flaw = await checkHrefLink( |
| | core, |
| | href, |
| | $, |
| | redirects, |
| | pageMap, |
| | checkAnchors, |
| | checkExternalLinks, |
| | externalServerErrorsAsWarning, |
| | permalink, |
| | { verbose, patient }, |
| | db, |
| | ) |
| |
|
| | if (flaw) { |
| | if (level === 'critical' && !flaw.CRITICAL) { |
| | return |
| | } |
| | const text = $(link).text() |
| | if (!href.startsWith('#')) { |
| | globalHrefCheckCache.set(href, { href, flaw, text }) |
| | } |
| | return { href, flaw, text } |
| | } else { |
| | if (!href.startsWith('#')) { |
| | globalHrefCheckCache.set(href, flaw) |
| | } |
| | } |
| | }, |
| | opts.concurrency || DEFAULT_CONCURRENCY_LIMIT, |
| | ) |
| |
|
| | for (const flaw of newFlaws) { |
| | if (flaw) { |
| | flaws.push(Object.assign(flaw, { page, permalink })) |
| | } |
| | } |
| |
|
| | if (checkImages) { |
| | $('img[src]').each((i, img) => { |
| | let { src } = (img as cheerio.TagElement).attribs |
| |
|
| | |
| | |
| | |
| | |
| | src = src.replace(/\/cb-\d+\//, '/') |
| |
|
| | if (globalImageSrcCheckCache.has(src)) { |
| | globalCacheHitCount++ |
| | return globalImageSrcCheckCache.get(src) |
| | } |
| |
|
| | const flaw = checkImageSrc(src) |
| |
|
| | globalImageSrcCheckCache.set(src, flaw) |
| |
|
| | if (flaw) { |
| | if (level === 'critical' && !flaw.CRITICAL) { |
| | return |
| | } |
| | flaws.push({ permalink, page, src, flaw }) |
| | } |
| | }) |
| | } |
| |
|
| | return flaws |
| | } |
| |
|
| | async function uploadJsonFlawsArtifact( |
| | uploadArtifact: UploadArtifact, |
| | flaws: LinkFlaw[], |
| | { verboseUrl = null }: { verboseUrl?: string | null } = {}, |
| | artifactName = 'all-rendered-link-flaws.json', |
| | ) { |
| | type PrintableLinkFlaw = { |
| | href?: string |
| | url?: string |
| | text?: string |
| | src?: string |
| | flaw?: Flaw |
| | } |
| | const printableFlaws: Record<string, PrintableLinkFlaw[]> = {} |
| | for (const { page, permalink, href, text, src, flaw } of flaws) { |
| | const fullPath = prettyFullPath(page.fullPath) |
| |
|
| | if (!(fullPath in printableFlaws)) { |
| | printableFlaws[fullPath] = [] |
| | } |
| | if (href) { |
| | printableFlaws[fullPath].push({ |
| | href, |
| | url: verboseUrl ? new URL(permalink.href, verboseUrl).toString() : permalink.href, |
| | text, |
| | flaw, |
| | }) |
| | } else if (src) { |
| | printableFlaws[fullPath].push({ |
| | src, |
| | }) |
| | } |
| | } |
| | const message = JSON.stringify(printableFlaws, undefined, 2) |
| | return uploadArtifact(artifactName, message) |
| | } |
| |
|
| | function printFlaws( |
| | core: CoreInject, |
| | flaws: LinkFlaw[], |
| | { verboseUrl }: { verboseUrl?: string | undefined } = {}, |
| | ) { |
| | let previousPage = null |
| | let previousPermalink = null |
| |
|
| | for (const { page, permalink, href, text, src, flaw } of flaws) { |
| | const fullPath = prettyFullPath(page.fullPath) |
| | if (page !== previousPage) { |
| | core.info(`PAGE: ${chalk.bold(fullPath)}`) |
| | } |
| | previousPage = page |
| |
|
| | if (href) { |
| | if (previousPermalink !== permalink.href) { |
| | if (verboseUrl) { |
| | core.info(` URL: ${new URL(permalink.href, verboseUrl).toString()}`) |
| | } else { |
| | core.info(` PERMALINK: ${permalink.href}`) |
| | } |
| | } |
| | previousPermalink = permalink.href |
| |
|
| | core.info(` HREF: ${chalk.bold(href)}`) |
| | core.info(` TEXT: ${text}`) |
| | } else if (src) { |
| | core.info(` IMG SRC: ${chalk.bold(src)}`) |
| | } else { |
| | throw new Error("Flaw has neither 'href' nor 'src'") |
| | } |
| |
|
| | core.info(` FLAW: ${flaw.CRITICAL ? chalk.red(flaw.CRITICAL) : chalk.yellow(flaw.WARNING)}`) |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | function prettyFullPath(fullPath: string) { |
| | return path.relative(process.cwd(), fullPath) |
| | } |
| |
|
| | const globalHrefCheckCache = new Map() |
| | const globalImageSrcCheckCache = new Map() |
| | let globalCacheHitCount = 0 |
| | let globalCacheMissCount = 0 |
| |
|
| | async function checkHrefLink( |
| | core: any, |
| | href: string, |
| | $: cheerio.Root, |
| | redirects: Redirects, |
| | pageMap: PageMap, |
| | checkAnchors = false, |
| | checkExternalLinks = false, |
| | externalServerErrorsAsWarning: string | undefined | null = null, |
| | permalink: Permalink, |
| | { verbose = false, patient = false }: { verbose?: boolean; patient?: boolean } = {}, |
| | db: DBType | null = null, |
| | ): Promise<Flaw | undefined> { |
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | const [pathFragment, hashFragment] = href.split('#') |
| | const hash = `#${hashFragment}` |
| |
|
| | |
| | if (checkAnchors && (!pathFragment || pathFragment === permalink.href)) { |
| | |
| | |
| | if (hash === '#') { |
| | return { WARNING: 'Link is just an empty `#`' } |
| | } |
| | |
| | |
| | |
| | else { |
| | |
| | |
| | |
| | const avoid = |
| | permalink && |
| | ((permalink.href.includes('/rest/') && !permalink.href.includes('/rest/guides/')) || |
| | permalink.href.includes('/webhooks-and-events/webhooks/webhook-events-and-payloads') || |
| | permalink.href.includes('/graphql/reference') || |
| | permalink.href.includes('/code-security/codeql-cli/codeql-cli-manual/') || |
| | permalink.href.includes( |
| | '/apps/maintaining-github-apps/modifying-a-github-app-registration', |
| | ) || |
| | permalink.href.includes( |
| | '/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning', |
| | ) || |
| | permalink.href.includes( |
| | '/site-policy/github-company-policies/github-statement-against-modern-slavery-and-child-labor', |
| | ) || |
| | permalink.href.includes('/site-policy/content-removal-policies/dmca-takedown-policy') || |
| | permalink.href.includes('/early-access/')) |
| |
|
| | |
| | |
| | if (hash !== '#top' && !avoid) { |
| | |
| | |
| | const countDOMItems = $(hash).length + $(`a[name="${hash.slice(1)}"]`).length |
| | if (countDOMItems === 0) { |
| | return { CRITICAL: `Anchor on the same page can't be found by ID` } |
| | } else if (countDOMItems > 1) { |
| | return { CRITICAL: `Matches multiple points in the page` } |
| | } |
| | } |
| | } |
| | } |
| | |
| | else { |
| | |
| | |
| | |
| | if (href.startsWith('/')) { |
| | const pathname = new URL(href, 'http://example.com').pathname |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | if (pathname.endsWith('/')) { |
| | const whatifPathname = pathname.slice(0, -1) |
| | if (getRedirect(whatifPathname, { redirects, pages: pageMap })) { |
| | return { |
| | WARNING: `Redirect to ${getRedirect(whatifPathname, { redirects, pages: pageMap })}`, |
| | } |
| | } else if (!pageMap[whatifPathname]) { |
| | if (!deprecatedVersionPrefixesRegex.test(whatifPathname)) { |
| | return { CRITICAL: 'Broken link' } |
| | } |
| | } |
| | return { WARNING: 'Links with a trailing / will always redirect' } |
| | } else { |
| | const firstPart = pathname.split('/')[1] |
| | if (STATIC_PREFIXES[firstPart]) { |
| | const staticFilePath = path.join( |
| | STATIC_PREFIXES[firstPart], |
| | pathname.split(path.sep).slice(2).join(path.sep), |
| | ) |
| | if (!fs.existsSync(staticFilePath)) { |
| | return { CRITICAL: `Static file not found ${staticFilePath} (${pathname})` } |
| | } |
| | } else if (getRedirect(pathname, { redirects, pages: pageMap })) { |
| | return { WARNING: `Redirect to ${getRedirect(pathname, { redirects, pages: pageMap })}` } |
| | } else if (!pageMap[pathname]) { |
| | if (deprecatedVersionPrefixesRegex.test(pathname)) { |
| | return |
| | } |
| |
|
| | return { CRITICAL: 'Broken link' } |
| | } |
| | } |
| | } |
| | |
| | |
| | else if (checkExternalLinks) { |
| | if (!href.startsWith('https://')) { |
| | return { WARNING: `Will not check external URLs that are not HTTPS (${href})` } |
| | } |
| | if (linksToSkip(href)) { |
| | return |
| | } |
| | const { ok, ...info } = await checkExternalURLCached(core, href, { verbose, patient }, db) |
| | if (!ok) { |
| | |
| | |
| | |
| | let problem = 'CRITICAL' |
| | if (externalServerErrorsAsWarning) { |
| | if ( |
| | (info.statusCode && info.statusCode >= 500) || |
| | (info.requestError && isTemporaryRequestError(info.requestError)) |
| | ) { |
| | problem = 'WARNING' |
| | } |
| | } |
| | return { [problem]: `Broken external link (${JSON.stringify(info)})`, isExternal: true } |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | function isTemporaryRequestError(requestError: string | undefined) { |
| | if (typeof requestError === 'string') { |
| | |
| | |
| | const errorEnums = ['ECONNRESET', 'ECONNREFUSED', 'ETIMEDOUT', 'ECONNABORTED'] |
| | return errorEnums.some((enum_) => requestError.includes(enum_)) |
| | } |
| | return false |
| | } |
| |
|
| | |
| | |
| | |
| | async function checkExternalURLCached( |
| | core: CoreInject, |
| | href: string, |
| | { verbose, patient }: { verbose?: boolean; patient?: boolean }, |
| | db: DBType | null, |
| | ) { |
| | const cacheMaxAge = EXTERNAL_LINK_CHECKER_MAX_AGE_MS |
| | const now = new Date().getTime() |
| | const url = href.split('#')[0] |
| |
|
| | |
| | const { hostname } = new URL(url) |
| | const rateLimitTime = _rateLimitedDomains.get(hostname) |
| | if (rateLimitTime) { |
| | const oneHourAgo = Date.now() - 60 * 60 * 1000 |
| | if (rateLimitTime > oneHourAgo) { |
| | if (verbose) core.info(`Skipping ${url} - domain ${hostname} is rate limited`) |
| | return { ok: false, statusCode: 429, skipReason: 'Domain rate limited' } |
| | } else { |
| | |
| | _rateLimitedDomains.delete(hostname) |
| | } |
| | } |
| |
|
| | if (cacheMaxAge) { |
| | const tooOld = now - Math.floor(jitter(cacheMaxAge, 10)) |
| | if (db && db.data.urls[url]) { |
| | if (db.data.urls[url].timestamp > tooOld) { |
| | if (verbose) { |
| | core.info(`External URL ${url} in cache`) |
| | } |
| | return db.data.urls[url].result |
| | } else if (verbose) { |
| | core.info(`External URL ${url} in cache but too old`) |
| | |
| | delete db.data.urls[url] |
| | } |
| | } |
| | } |
| |
|
| | const result = await checkExternalURL(core, href, { |
| | verbose, |
| | patient, |
| | }) |
| |
|
| | if (cacheMaxAge) { |
| | |
| | |
| | if (db && result.ok) { |
| | db.data.urls[url] = { |
| | timestamp: now, |
| | result, |
| | } |
| | } |
| | } |
| |
|
| | return result |
| | } |
| |
|
| | const _fetchCache = new Map() |
| | async function checkExternalURL( |
| | core: CoreInject, |
| | url: string, |
| | { verbose = false, patient = false } = {}, |
| | ) { |
| | if (!url.startsWith('https://')) throw new Error('Invalid URL') |
| | const cleanURL = url.split('#')[0] |
| | if (!_fetchCache.has(cleanURL)) { |
| | _fetchCache.set(cleanURL, innerFetch(core, cleanURL, { verbose, patient })) |
| | } |
| | return _fetchCache.get(cleanURL) |
| | } |
| |
|
| | |
| | |
| | const _rateLimitedDomains = new Map<string, number>() |
| |
|
| | async function innerFetch( |
| | core: CoreInject, |
| | url: string, |
| | config: { verbose?: boolean; patient?: boolean; retries?: number } = {}, |
| | ) { |
| | const { verbose, patient } = config |
| |
|
| | const headers = { |
| | 'User-Agent': |
| | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36', |
| | } |
| |
|
| | const retries = patient ? 3 : 2 |
| | const timeout = patient ? 10000 : 5000 |
| |
|
| | if (verbose) core.info(`External URL HEAD: ${url}`) |
| |
|
| | try { |
| | |
| | let r = await fetchWithRetry( |
| | url, |
| | { |
| | method: 'HEAD', |
| | headers, |
| | }, |
| | { |
| | retries, |
| | timeout, |
| | throwHttpErrors: false, |
| | }, |
| | ) |
| |
|
| | |
| | if (r.status === 405 || r.status === 404 || r.status === 403) { |
| | if (verbose) core.info(`External URL GET: ${url} (HEAD failed with ${r.status})`) |
| | r = await fetchWithRetry( |
| | url, |
| | { |
| | method: 'GET', |
| | headers, |
| | }, |
| | { |
| | retries, |
| | timeout, |
| | throwHttpErrors: false, |
| | }, |
| | ) |
| | } |
| |
|
| | if (verbose) { |
| | core.info(`External URL ${url}: ${r.status}`) |
| | } |
| |
|
| | |
| | const { hostname } = new URL(url) |
| | if (r.status === 429) { |
| | _rateLimitedDomains.set(hostname, Date.now()) |
| | if (verbose) core.info(`Domain ${hostname} is now rate limited for 1 hour`) |
| | } |
| |
|
| | if (verbose) { |
| | core.info((r.ok ? chalk.green : chalk.red)(`${r.status} on ${url}`)) |
| | } |
| | return { ok: r.ok, statusCode: r.status } |
| | } catch (err) { |
| | if (err instanceof Error) { |
| | if (verbose) { |
| | core.info(chalk.yellow(`Request Error (${err.message}) on ${url}`)) |
| | } |
| | return { ok: false, requestError: err.message } |
| | } |
| | throw err |
| | } |
| | } |
| |
|
| | function checkImageSrc(src: string) { |
| | if (!src.startsWith('/') && !src.startsWith('http')) { |
| | return { CRITICAL: 'Image path is not absolute. Should start with a /' } |
| | } |
| | const pathname = new URL(src, 'http://example.com').pathname |
| | if (pathname.startsWith('http://')) { |
| | return { CRITICAL: "Don't use insecure HTTP:// for external images" } |
| | } |
| | if (!pathname.startsWith('/')) { |
| | return { WARNING: "External images can't not be checked" } |
| | } |
| | const prefix = pathname.split('/')[1] |
| | if (prefix in STATIC_PREFIXES) { |
| | const staticFilePath = path.join( |
| | STATIC_PREFIXES[prefix], |
| | pathname.split(path.sep).slice(2).join(path.sep), |
| | ) |
| | if (!fs.existsSync(staticFilePath)) { |
| | return { CRITICAL: `Static file not found (${pathname})` } |
| | } |
| | } else { |
| | return { WARNING: `Unrecognized image src prefix (${prefix})` } |
| | } |
| | } |
| |
|
| | function summarizeFlaws(core: CoreInject, flaws: LinkFlaw[]) { |
| | if (flaws.length) { |
| | core.info( |
| | chalk.bold( |
| | `Found ${flaws.length.toLocaleString()} flaw${flaws.length === 1 ? '' : 's'} in total.`, |
| | ), |
| | ) |
| | } else { |
| | core.info(chalk.green('No flaws found! 💖')) |
| | } |
| | } |
| |
|
| | function summarizeCounts(core: CoreInject, pages: Page[], tookSeconds: number) { |
| | const count = pages.map((page) => page.permalinks.length).reduce((a, b) => a + b, 0) |
| | core.info( |
| | `Tested ${count.toLocaleString()} permalinks across ${pages.length.toLocaleString()} pages`, |
| | ) |
| | core.info(`Took ${Math.floor(tookSeconds)} seconds. (~${(tookSeconds / 60).toFixed(1)} minutes)`) |
| | const permalinksPerSecond = count / tookSeconds |
| | core.info(`~${permalinksPerSecond.toFixed(1)} permalinks per second.`) |
| | const pagesPerSecond = pages.length / tookSeconds |
| | core.info(`~${pagesPerSecond.toFixed(1)} pages per second.`) |
| | } |
| |
|
| | function shuffle(array: any[]) { |
| | let currentIndex = array.length |
| | let randomIndex |
| |
|
| | |
| | while (currentIndex !== 0) { |
| | |
| | randomIndex = Math.floor(Math.random() * currentIndex) |
| | currentIndex-- |
| |
|
| | |
| | ;[array[currentIndex], array[randomIndex]] = [array[randomIndex], array[currentIndex]] |
| | } |
| |
|
| | return array |
| | } |
| |
|
| | async function renderInnerHTML(page: Page, permalink: Permalink) { |
| | const next = () => {} |
| | const res = {} |
| |
|
| | const pagePath = permalink.href |
| | const context: Context = {} |
| | const req = { |
| | path: pagePath, |
| | language: permalink.languageCode, |
| | pagePath, |
| | cookies: {}, |
| | context, |
| | } |
| | |
| | await contextualize(req as ExtendedRequest, res as Response, next) |
| | await shortVersions(req as ExtendedRequest, res as Response, next) |
| | req.context.page = page |
| | features(req as ExtendedRequest, res as Response, next) |
| |
|
| | req.context.relativePath = page.relativePath |
| |
|
| | const guts = [page.rawIntro, page.rawPermissions, page.markdown].filter(Boolean).join('\n').trim() |
| |
|
| | |
| | |
| | const markdown = await liquid.parseAndRender(guts, req.context) |
| | const processor = createMinimalProcessor(req.context) |
| | const vFile = await processor.process(markdown) |
| | return vFile.toString() |
| | } |
| |
|
| | export default main |
| |
|