github-docs-arabic-enhanced / src /workflows /content-changes-table-comment.ts

Upload folder using huggingface_hub

88df9e4 verified about 1 month ago

9.72 kB

	/**
	* Hi there! 👋
	* To test this code locally, outside of Actions, you need to run
	* the script src/workflows/content-changes-table-comment-cli.ts
	*
	* See the instructions in the doc string comment at the
	* top of src/workflows/content-changes-table-comment-cli.ts
	*/

	import fs from 'node:fs'
	import path from 'node:path'

	import github from '@actions/github'
	import core from '@actions/core'

	import walk from 'walk-sync'
	import { Octokit } from '@octokit/rest'
	import { retry } from '@octokit/plugin-retry'

	import { getContents } from './git-utils'
	import getApplicableVersions from '@/versions/lib/get-applicable-versions'
	import nonEnterpriseDefaultVersion from '@/versions/lib/non-enterprise-default-version'
	import { allVersionShortnames } from '@/versions/lib/all-versions'
	import readFrontmatter from '@/frame/lib/read-frontmatter'
	import { inLiquid } from './lib/in-liquid'

	const { GITHUB_TOKEN, APP_URL, BASE_SHA, HEAD_SHA } = process.env
	const context = github.context

	// the max size of the comment (in bytes)
	// the action we use to post the comment caps out at about 144kb
	// see docs-engineering#1849 and peter-evans/create-or-update-comment#271 for more info.
	// The max size the action allows is 2^16, but our table calculates near the end
	// of its rendering before we add a key, so playing it safe with 2^15.
	const MAX_COMMENT_SIZE = 32768

	const PROD_URL = 'https://docs.github.com'

	// When this file is invoked directly from action as opposed to being imported
	if (import.meta.url.endsWith(process.argv[1])) {
	const baseOwner = context.payload.pull_request!.base.repo.owner.login
	const baseRepo = context.payload.pull_request!.base.repo.name

	const baseSHA = BASE_SHA \|\| context.payload.pull_request!.base.sha
	const headSHA = HEAD_SHA \|\| context.payload.pull_request!.head.sha

	const markdownTable = await main(baseOwner, baseRepo, baseSHA, headSHA)
	core.setOutput('changesTable', markdownTable)
	}

	async function main(owner: string, repo: string, baseSHA: string, headSHA: string) {
	if (!GITHUB_TOKEN) {
	throw new Error(`GITHUB_TOKEN environment variable not set`)
	}
	if (!APP_URL) {
	throw new Error(`APP_URL environment variable not set`)
	}

	const RetryingOctokit = Octokit.plugin(retry)
	const octokit = new RetryingOctokit({
	auth: `token ${GITHUB_TOKEN}`,
	})

	// get the list of file changes from the PR
	// this works even if the head commit is from a fork
	const response = await octokit.rest.repos.compareCommitsWithBasehead({
	owner,
	repo,
	basehead: `${baseSHA}...${headSHA}`,
	})

	const { files } = response.data

	const pathPrefix = 'content/'
	const reusablesPrefix = 'data/reusables/'
	const articleFiles = (files \|\| []).filter(
	({ filename }) => filename.startsWith(pathPrefix) && filename.toLowerCase() !== 'readme.md',
	)
	const reusablesFiles = (files \|\| []).filter(
	({ filename }) =>
	filename.startsWith(reusablesPrefix) && filename.toLowerCase() !== 'readme.md',
	)

	const filesUsingReusables: File[] = []
	if (reusablesFiles.length) {
	const contentFilePaths = new Set<string>()
	const allContentFiles = getAllContentFiles()
	for (const reusablesFile of reusablesFiles) {
	const relativePath = path
	.relative('data/reusables', reusablesFile.filename)
	.replace(/\.md$/, '')
	const needle = `reusables.${relativePath.split('/').join('.')}`
	for (const [contentFilePath, contentFileContents] of allContentFiles) {
	if (inLiquid(contentFilePath, contentFileContents, needle)) {
	contentFilePaths.add(contentFilePath)
	}
	}
	}
	const articleFilesAlready = articleFiles.map((file) => file.filename)
	for (const contentFilePath of contentFilePaths) {
	if (articleFilesAlready.includes(contentFilePath)) continue
	filesUsingReusables.push({
	filename: contentFilePath,
	blob_url: makeBlobUrl(owner, repo, headSHA, contentFilePath),
	status: 'changed',
	})
	}
	}

	const lines = await Promise.all(
	articleFiles.map(async (file) => {
	const sourceUrl = file.blob_url
	const fileName = file.filename.slice(pathPrefix.length)
	const fileUrl = fileName.replace('/index.md', '').replace(/\.md$/, '')

	// get the file contents and decode them
	// this script is called from the main branch, so we need the API call to get the contents from the branch, instead
	const fileContents = await getContents(
	owner,
	repo,
	// Can't get its content if it no longer exists.
	// Meaning, you'd get a 404 on the `getContents()` utility function.
	// So, to be able to get necessary meta data about what it was,
	// if it was removed, fall back to the 'base'.
	file.status === 'removed' ? baseSHA : headSHA,
	file.filename,
	)

	const { data } = readFrontmatter(fileContents)
	if (!data) {
	console.warn(`Unable to extract frontmatter from ${file.filename}`)
	return
	}

	return makeRow({ file, fileName, sourceUrl, fileUrl, data })
	}),
	)

	lines.push(
	...(await Promise.all(
	filesUsingReusables.map((file) => {
	const { data } = readFrontmatter(fs.readFileSync(file.filename, 'utf-8'))
	const fileName = file.filename.slice(pathPrefix.length)
	const fileUrl = fileName.replace('/index.md', '').replace(/\.md$/, '')
	return makeRow({
	file,
	fileName,
	sourceUrl: file.blob_url,
	fileUrl,
	data,
	fromReusable: true,
	})
	}),
	)),
	)

	const filteredLines = lines.filter(Boolean) as string[]
	if (!filteredLines.length) {
	console.warn(
	"No found files to generate a comment from. This PR doesn't contain any content changes.",
	)
	return ''
	}

	const headings = ['Source', 'Review', 'Production', 'What Changed']
	const markdownTableHead = [
	`\| ${headings.map((heading) => `${heading}`).join(' \| ')} \|`,
	`\| ${headings.map(() => ':---').join(' \| ')} \|`,
	]
	let markdownTable = `${markdownTableHead.join('\n')}\n`
	for (const filteredLine of filteredLines) {
	if ((markdownTable + filteredLine).length > MAX_COMMENT_SIZE) {
	markdownTable += '\nNote There are more changes in this PR than we can show.'
	break
	}
	markdownTable += `${filteredLine}\n`
	}

	return markdownTable
	}

	function makeBlobUrl(owner: string, repo: string, sha: string, filePath: string) {
	return `https://github.com/${owner}/${repo}/blob/${sha}/${encodeURIComponent(filePath)}`
	}

	type File = {
	filename: string
	status: 'added' \| 'removed' \| 'modified' \| 'renamed' \| 'copied' \| 'changed' \| 'unchanged'
	blob_url: string
	}

	function makeRow({
	file,
	fileUrl,
	fileName,
	sourceUrl,
	data,
	fromReusable,
	}: {
	file: File
	fileUrl: string
	fileName: string
	sourceUrl: string
	data: any
	fromReusable?: boolean
	}) {
	let contentCell = ''
	let reviewCell = ''
	let prodCell = ''

	if (file.status === 'added') contentCell = 'New file: '
	else if (file.status === 'removed') contentCell = 'Removed: '
	contentCell += `[\`${fileName}\`](${sourceUrl})`

	try {
	// the try/catch is needed because getApplicableVersions() returns either [] or throws an error when it can't parse the versions frontmatter
	// try/catch can be removed if docs-engineering#1821 is resolved
	// i.e. for feature based versioning, like ghec: 'issue-6337'
	const fileVersions: string[] = getApplicableVersions(data.versions)

	for (const plan in allVersionShortnames) {
	// plan is the shortName (i.e., fpt)
	// allVersionShortNames[plan] is the planName (i.e., free-pro-team)

	// walk by the plan names since we generate links differently for most plans
	const versions = fileVersions.filter((fileVersion) =>
	fileVersion.includes(allVersionShortnames[plan]),
	)

	if (versions.length === 1) {
	// for fpt and ghec

	if (versions.toString() === nonEnterpriseDefaultVersion) {
	// omit version from fpt url

	reviewCell += `[${plan}](${APP_URL}/${fileUrl})<br>`
	prodCell += `[${plan}](${PROD_URL}/${fileUrl})<br>`
	} else {
	// for non-versioned releases (ghec) use full url

	reviewCell += `[${plan}](${APP_URL}/${versions}/${fileUrl})<br>`
	prodCell += `[${plan}](${PROD_URL}/${versions}/${fileUrl})<br>`
	}
	} else if (versions.length) {
	// for ghes releases, link each version

	reviewCell += `${plan}@ `
	prodCell += `${plan}@ `

	for (const version of versions) {
	reviewCell += `[${version.split('@')[1]}](${APP_URL}/${version}/${fileUrl}) `
	prodCell += `[${version.split('@')[1]}](${PROD_URL}/${version}/${fileUrl}) `
	}
	reviewCell += '<br>'
	prodCell += '<br>'
	}
	}
	} catch {
	console.error(
	`Version information for ${file.filename} couldn't be determined from its frontmatter.`,
	)
	}
	let note = ''
	if (file.status === 'removed') {
	note = 'removed'
	// If the file was removed, the `reviewCell` no longer makes sense
	// since it was based on looking at the base sha.
	reviewCell = 'n/a'
	} else if (fromReusable) {
	note += 'from reusable'
	}

	return `\| ${contentCell} \| ${reviewCell} \| ${prodCell} \| ${note} \|`
	}

	function getAllContentFiles(): Map<string, string> {
	const options = {
	globs: ['*/.md'],
	includeBasePath: true,
	}
	const contentFiles = new Map<string, string>()
	for (const file of walk('content', options)) {
	contentFiles.set(file, fs.readFileSync(file, 'utf-8'))
	}
	return contentFiles
	}

	export default main