AbdulElahGwaith's picture
Upload folder using huggingface_hub
88df9e4 verified
import path from 'path'
import { isEqual, uniqWith } from 'lodash-es'
import { describe, expect, test, vi } from 'vitest'
import patterns from '@/frame/lib/patterns'
import { getDataByLanguage, getDeepDataByLanguage } from '@/data-directory/lib/get-data'
// Given syntax like {% data foo.bar %} or {% indented_data_reference foo.bar spaces=3 %},
// the following regex returns just the dotted path: foo.bar
// Note this regex allows nonstandard whitespace between terms; it does not enforce a single space.
// In other words, it will allow {%data foo.bar %} or {% data foo.bar %}.
// We should enforce a single space someday, but the content will need a lot of cleanup first, and
// we should have a more purpose-driven validation test for that instead of enforcing it here.
const getDataPathRegex =
/{%\s*?(?:data|indented_data_reference)\s+?(\S+?)\s*?(?:spaces=\d\d?\s*?)?%}/
const rawLiquidPattern = /{%\s*raw\s*%}.*?{%\s*endraw\s*%}/gs
const getDataReferences = (content: string): string[] => {
// When looking for things like `{% data reusables.foo %}` in the
// content, we first have to exclude any Liquid that isn't real.
// E.g.
// {% raw %}
// Here's an example: {% data reusables.foo.bar %}
// {% endraw %}
const withoutRawLiquidBlocks = content.replace(rawLiquidPattern, '')
const refs = withoutRawLiquidBlocks.match(patterns.dataReference) || []
return refs.map((ref: string) => ref.replace(getDataPathRegex, '$1'))
}
describe('data references', () => {
vi.setConfig({ testTimeout: 60 * 1000 })
test('every data reference found in English variable files is defined and has a value', async () => {
// value can be any type returned by getDataByLanguage - we check if it's a string
let errors: Array<{ key: string; value: unknown; variableFile: string }> = []
const allVariables = getDeepDataByLanguage('variables', 'en')
const variables = Object.values(allVariables)
expect(variables.length).toBeGreaterThan(0)
await Promise.all(
variables.map(async (variablesPerFile) => {
const variableRefs = getDataReferences(JSON.stringify(variablesPerFile))
for (const key of variableRefs) {
const value = getDataByLanguage(key, 'en')
if (typeof value !== 'string') {
const filename = getFilenameByValue(allVariables, variablesPerFile)
const variableFile = path.join('data/variables', filename || '')
errors.push({ key, value, variableFile })
}
}
}),
)
errors = uniqWith(errors, isEqual) // remove duplicates
expect(errors.length, JSON.stringify(errors, null, 2)).toBe(0)
})
})
// object is the allVariables object with dynamic keys, value is the nested object we're searching for
function getFilenameByValue(object: Record<string, unknown>, value: unknown): string | undefined {
return Object.keys(object).find((key) => object[key] === value)
}