AbdulElahGwaith's picture
Upload folder using huggingface_hub
88df9e4 verified
import { program } from 'commander'
import fs from 'fs'
import coreLib from '@actions/core'
import github from '@/workflows/github'
import { getEnvInputs } from '@/workflows/get-env-inputs'
import { createReportIssue, linkReports } from '@/workflows/issue-report'
import { getAllRuleNames } from '@/content-linter/lib/helpers/rule-utils'
// GitHub issue body size limit is ~65k characters, so we'll use 60k as a safe limit
const MAX_ISSUE_BODY_SIZE = 60000
// If the number of warnings exceeds this number, print a warning so we can give them attention
const MAX_WARNINGS_BEFORE_ALERT = 20
/**
* Config that only applies to automated weekly reports.
*/
export const reportingConfig = {
// Include only rules with these severities in reports
includeSeverities: ['error', 'warning'],
// Include these rules regardless of severity in reports
includeRules: ['expired-content'],
}
interface LintFlaw {
severity: string
ruleNames: string[]
errorDetail?: string
}
/**
* Determines if a lint result should be included in the automated report
*/
function shouldIncludeInReport(flaw: LintFlaw): boolean {
const allRuleNames = getAllRuleNames(flaw)
// Check if severity should be included
if (reportingConfig.includeSeverities.includes(flaw.severity)) {
return true
}
// Check if any rule name is in the include list that overrides severity
const hasIncludedRule = allRuleNames.some((ruleName: string) =>
reportingConfig.includeRules.includes(ruleName),
)
if (hasIncludedRule) {
return true
}
return false
}
// [start-readme]
//
// This script runs once a week via a scheduled GitHub Action to lint
// the entire content and data directories based on our
// markdownlint.js rules.
//
// If errors or warnings are found, it will open up a new issue in the
// docs-content repo with the label "broken content markdown report".
//
// The Content FR will go through the issue and update the content and
// data files accordingly.
//
// [end-readme]
program
.description(
'Opens an issue for Content FR with the errors and warnings from the weekly content/data linter.',
)
.option(
'-p, --path <path>',
'provide a path to the errors and warnings output json file that will be in the issue body',
)
.parse(process.argv)
const { path } = program.opts()
main()
async function main() {
const lintResults = fs.readFileSync(`${path}`, 'utf8')
const core = coreLib
const { REPORT_REPOSITORY, REPORT_AUTHOR, REPORT_LABEL } = process.env
const octokit = github()
// `GITHUB_TOKEN` is optional. If you need the token to post a comment
// or open an issue report, you might get cryptic error messages from Octokit.
getEnvInputs(['GITHUB_TOKEN'])
core.info(`Creating issue for configured lint rules...`)
const parsedResults = JSON.parse(lintResults)
// Keep track of warnings so we can print an alert when they exceed a manageable number
let totalWarnings = 0
// Filter results based on reporting configuration
const filteredResults: Record<string, LintFlaw[]> = {}
for (const [file, flaws] of Object.entries(parsedResults)) {
const filteredFlaws = (flaws as LintFlaw[]).filter((flaw) => shouldIncludeInReport(flaw))
// Only include files that have remaining flaws after filtering
if (filteredFlaws.length > 0) {
totalWarnings += filteredFlaws.filter((flaw) => flaw.severity === 'warning').length
filteredResults[file] = filteredFlaws
}
}
const totalFiles = Object.keys(filteredResults).length
const showTooManyWarningsAlert = totalWarnings > MAX_WARNINGS_BEFORE_ALERT
const tooManyWarningsAlertText = showTooManyWarningsAlert
? `**Alert**: ${totalWarnings} warning-level issues have been found. These must be addressed ASAP so they do not continue to accumulate.\n\n`
: ''
let reportBody = `${tooManyWarningsAlertText}The following files have markdown lint issues that require attention:\n\n`
let filesIncluded = 0
let truncated = false
for (const [file, flaws] of Object.entries(filteredResults)) {
const fileEntry = `File: \`${file}\`:\n\`\`\`json\n${JSON.stringify(flaws, null, 2)}\n\`\`\`\n`
// Check if adding this file would exceed the size limit
if (reportBody.length + fileEntry.length > MAX_ISSUE_BODY_SIZE) {
truncated = true
break
}
reportBody += fileEntry
filesIncluded++
}
// Add truncation notice if needed
if (truncated) {
const remaining = totalFiles - filesIncluded
reportBody += `\n---\n\n⚠️ **Output truncated**: Showing ${filesIncluded} of ${totalFiles} files with lint issues. ${remaining} additional files have been omitted to stay within GitHub's issue size limits.\n`
reportBody += `\nTo see all results, run the linter locally:\n\`\`\`bash\nnpm run lint-content -- --paths content data\n\`\`\`\n`
}
const reportProps = {
core,
octokit,
reportTitle: `Content linting issues requiring attention`,
reportBody,
reportRepository: REPORT_REPOSITORY!,
reportLabel: REPORT_LABEL!,
}
const newReport = await createReportIssue(reportProps)
const linkProps = {
core,
octokit,
newReport,
reportRepository: REPORT_REPOSITORY!,
reportAuthor: REPORT_AUTHOR!,
reportLabel: REPORT_LABEL!,
}
await linkReports(linkProps)
}