import walk from 'walk-sync' import path from 'path' import { TokenizationError, TokenKind } from 'liquidjs' import type { TagToken } from 'liquidjs' import { getLiquidTokens } from '@/content-linter/lib/helpers/liquid-utils' const __dirname = path.dirname(new URL(import.meta.url).pathname) const repoRoot = path.resolve(__dirname, '../../../../') const contentDirectory = path.resolve(__dirname, repoRoot, 'content/') const dataDirectory = path.resolve(__dirname, repoRoot, 'data/') const reusablesDirectory = path.resolve(dataDirectory, 'reusables/') export type FilesWithLineNumbers = { filePath: string lineNumbers: number[] reusableFile?: string }[] export type FilesWithSimilarity = { filePath: string similarityScore: number reusableFile?: string }[] export function filterFiles(files: string[]) { return files.filter( (filePath) => filePath.endsWith('.md') || (filePath.endsWith('.yml') && !filePath.endsWith('README.md')), ) } export function getAllContentFilePaths() { const allContentFiles = filterFiles( walk(contentDirectory, { includeBasePath: true, directories: false, }), ) const allDataFiles = filterFiles( walk(dataDirectory, { includeBasePath: true, directories: false, }), ) return [...allContentFiles, ...allDataFiles] } // Get the string that represents the reusable in the content files export function getReusableLiquidString(reusablePath: string): string { const relativePath = path.relative(reusablesDirectory, reusablePath) return `reusables.${relativePath.slice(0, -3).split('/').join('.')}` } export function getIndicesOfLiquidVariable(liquidVariable: string, fileContents: string): number[] { const indices: number[] = [] try { const tokens = getLiquidTokens(fileContents).filter( (token): token is TagToken => token.kind === TokenKind.Tag, ) for (const token of tokens) { if (token.name === 'data' && token.args.trim() === liquidVariable) { indices.push(token.begin) } } } catch (err) { if (err instanceof TokenizationError) return [] throw err } return indices } // Find the path to a reusable file. export function resolveReusablePath(reusablePath: string): string { // Try .md if extension is not provided if (!reusablePath.endsWith('.md') && !reusablePath.endsWith('.yml')) { reusablePath += '.md' } // Allow user to just pass the name of the file. If it's not ambiguous, we'll find it. const allReusableFiles = getAllReusablesFilePaths() const foundPaths = [] for (const possiblePath of allReusableFiles) { if (possiblePath.includes(reusablePath)) { foundPaths.push(possiblePath) } } if (foundPaths.length === 0) { console.error(`Reusables file not found: ${reusablePath}`) process.exit(1) } else if (foundPaths.length === 1) { return foundPaths[0] } else { console.error(`Multiple reusables found by name: ${reusablePath}`) for (let i = 0; i < foundPaths.length; i++) { console.error(` ${i + 1}: ${getRelativeReusablesPath(foundPaths[i])}`) } console.error('Please specify which reusable by passing the full path') process.exit(1) } } let paths: string[] export function getAllReusablesFilePaths(): string[] { if (paths) return paths! paths = filterFiles( walk(reusablesDirectory, { includeBasePath: true, directories: false, ignore: ['**/README.md', 'enterprise_deprecation/**'], }), ) return paths } export function findIndicesOfSubstringInString(substr: string, str: string): number[] { str = str.toLowerCase() const result: number[] = [] let idx = str.indexOf(substr) while (idx !== -1) { result.push(idx) idx = str.indexOf(substr, idx + 1) } return result } export function findSimilarSubStringInString(substr: string, str: string) { // Take every sentence in the substr, lower case it, and compare it to every sentence in the str to get a similarity score const substrSentences = substr.split('.').map((sentence) => sentence.toLowerCase()) const corpus = str.split('.').map((sentence) => sentence.toLowerCase()) let similarityScore = 0 // Find how similar every two strings are based on the words they share for (const substrSentence of substrSentences) { for (const sentence of corpus) { const substrTokens = substrSentence.split(' ') const tokens = sentence.split(' ') const sharedWords = substrTokens.filter((token) => tokens.includes(token)) similarityScore += sharedWords.length / (substrTokens.length + tokens.length) } } // Normalize the similarity score return Math.round((similarityScore / substrSentences.length) * corpus.length) } export function printFindsWithLineNumbers( absolute: boolean, reusableFindings: { filePath: string; lineNumbers: number[]; reusableFile?: string }[], similarityFindings?: { filePath: string; similarityScore: number; reusableFile?: string }[], ) { for (const { filePath, lineNumbers, reusableFile } of reusableFindings) { let printReusablePath = reusableFile let printFilePath = filePath if (!absolute) { printReusablePath = getRelativeReusablesPath(printReusablePath as string) printFilePath = path.relative(repoRoot, printFilePath) } if (reusableFile) { console.log(`\nReusable ${printReusablePath} can be used`) console.log(`In ${printFilePath} on:`) } else { console.log(`\nIn ${printFilePath} on:`) } for (const lineNumber of lineNumbers) { console.log(` Line ${lineNumber}`) } } if (similarityFindings?.length) { console.log('\nFindings using "similar" algorithm:') for (const { filePath, similarityScore, reusableFile } of similarityFindings) { let printReusablePath = reusableFile let printFilePath = filePath if (!absolute) { printReusablePath = getRelativeReusablesPath(printReusablePath as string) printFilePath = path.relative(repoRoot, printFilePath) } if (reusableFile) { console.log(`\nReusables ${printReusablePath} can be used`) console.log(`In ${printFilePath} with similarity score: ${similarityScore}`) } else { console.log(`\nIn ${printFilePath} with similarity score: ${similarityScore}`) } } } } export function getRelativeReusablesPath(reusablePath: string) { if (!reusablePath) { return '' } return path.relative(repoRoot, reusablePath) }