Upload folder using huggingface_hub

88df9e4 verified about 1 month ago

6.06 kB

	import { addError, filterTokens } from 'markdownlint-rule-helpers'
	import matter from '@gr2m/gray-matter'

	import type { RuleParams, RuleErrorCallback, MarkdownToken } from '@/content-linter/types'

	// Adds an error object with details conditionally via the onError callback
	export function addFixErrorDetail(
	onError: RuleErrorCallback,
	lineNumber: number,
	expected: string,
	actual: string,
	// Using flexible type to accommodate different range formats from various linting rules
	range: [number, number] \| number[] \| null,
	// Using unknown for fixInfo as markdownlint-rule-helpers accepts various fix info structures
	fixInfo: unknown,
	): void {
	addError(onError, lineNumber, `Expected: ${expected}`, ` Actual: ${actual}`, range, fixInfo)
	}

	export function forEachInlineChild(
	params: RuleParams,
	type: string,
	// Handler uses `any` for function parameter variance reasons. TypeScript's contravariance rules for function
	// parameters mean that a function accepting a specific type cannot be assigned to a parameter of type `unknown`.
	// Therefore, `unknown` cannot be used here, as different linting rules pass tokens with varying structures
	// beyond the base MarkdownToken interface, and some handlers are async.
	handler: (child: any, token?: any) => void \| Promise<void>,
	): void {
	filterTokens(params, 'inline', (token: MarkdownToken) => {
	for (const child of token.children!.filter((c) => c.type === type)) {
	handler(child, token)
	}
	})
	}

	export function getRange(line: string, content: string): [number, number] \| null {
	if (content.length === 0) {
	// This function assumes that the content is something. If it's an
	// empty string it can never produce a valid range.
	throw new Error('invalid content (empty)')
	}
	const startColumnIndex = line.indexOf(content)
	return startColumnIndex !== -1 ? [startColumnIndex + 1, content.length] : null
	}

	export function isStringQuoted(text: string): boolean {
	// String starts with either a single or double quote
	// ends with either a single or double quote
	// and optionally ends with a question mark or exclamation point
	// because that punctuation can exist outside of the quoted string
	return /^['"].*['"][?!]?$/.test(text)
	}

	export function isStringPunctuated(text: string): boolean {
	// String ends with punctuation of either
	// . ? ! and optionally ends with single
	// or double quotes. This also allows
	// for single or double quotes before
	// the punctuation.
	return /^.*[.?!]['"]?$/.test(text)
	}

	export function doesStringEndWithPeriod(text: string): boolean {
	// String ends with punctuation of either
	// . ? ! and optionally ends with single
	// or double quotes. This also allows
	// for single or double quotes before
	// the punctuation.
	return /^.*\.['"]?$/.test(text)
	}

	export function quotePrecedesLinkOpen(text: string \| undefined): boolean {
	if (!text) return false
	return text.endsWith('"') \|\| text.endsWith("'")
	}

	// Filters a list of tokens by token type only when they match
	// a specific token type order.
	// For example, if a list of tokens contains:
	//
	// [
	// { type: 'inline'},
	// { type: 'list_item_close'},
	// { type: 'list_item_open'},
	// { type: 'paragraph_open'},
	// { type: 'inline'},
	// { type: 'paragraph_close'},
	// ]
	//
	// And if the `tokenOrder` being looked for is:
	//
	// [
	// 'list_item_open',
	// 'paragraph_open',
	// 'inline'
	// ]
	//
	// Then the return value would be the items that match that sequence:
	// Index 2-4:
	// [
	// { type: 'inline'}, <-- Index 0 - NOT INCLUDED
	// { type: 'list_item_close'}, <-- Index 1 - NOT INCLUDED
	// { type: 'list_item_open'}, <-- Index 2 - INCLUDED
	// { type: 'paragraph_open'}, <-- Index 3 - INCLUDED
	// { type: 'inline'}, <-- Index 4 - INCLUDED
	// { type: 'paragraph_close'}, <-- Index 5 - NOT INCLUDED
	// ]
	//
	export function filterTokensByOrder(
	tokens: MarkdownToken[],
	tokenOrder: string[],
	): MarkdownToken[] {
	const matches: MarkdownToken[] = []

	// Get a list of token indexes that match the
	// first token (root) in the tokenOrder array
	const tokenRootIndexes: number[] = []
	const firstTokenOrderType = tokenOrder[0]
	for (let index = 0; index < tokens.length; index++) {
	const token = tokens[index]
	if (token.type === firstTokenOrderType) {
	tokenRootIndexes.push(index)
	}
	}

	// Loop through each root token index and check if
	// the order matches the tokenOrder array
	for (const tokenRootIndex of tokenRootIndexes) {
	for (let i = 1; i < tokenOrder.length; i++) {
	if (tokens[tokenRootIndex + i].type !== tokenOrder[i]) {
	// This tokenRootIndex was a possible start,
	// but doesn't match the tokenOrder perfectly, so break out
	// of the inner loop before it reaches the end.
	break
	}
	if (i === tokenOrder.length - 1) {
	matches.push(...tokens.slice(tokenRootIndex, tokenRootIndex + i + 1))
	}
	}
	}
	return matches
	}

	export const docsDomains = ['docs.github.com', 'help.github.com', 'developer.github.com']

	// Lines is an array of strings read from a
	// Markdown file a split around new lines.
	// This is the format we get from Markdownlint.
	// Returns null if the lines do not contain
	// frontmatter properties.
	// Returns frontmatter as a Record with unknown values since YAML can contain various types
	export function getFrontmatter(lines: string[]): Record<string, unknown> \| null {
	const fmString = lines.join('\n')
	const { data } = matter(fmString)
	// If there is no frontmatter or the frontmatter contains
	// no keys, matter will return an empty object.
	if (Object.keys(data).length === 0) return null
	return data
	}

	export function getFrontmatterLines(lines: string[]): string[] {
	const indexStart = lines.indexOf('---')
	if (indexStart === -1) return []
	const indexEnd = lines.indexOf('---', indexStart + 1)
	return lines.slice(indexStart, indexEnd + 1)
	}