import { program, Option, Command, InvalidArgumentError } from 'commander' import { errors } from '@elastic/elasticsearch' import dotenv from 'dotenv' import { languageKeys } from '@/languages/lib/languages-server' import { indexGeneralSearch } from './lib/index-general-search' import { allIndexVersionKeys, supportedAutocompletePlanVersions, versionToIndexVersionMap, } from '@/search/lib/elasticsearch-versions' import { indexAISearchAutocomplete } from './lib/index-ai-search-autocomplete' // If you optionally have ELASTICSEARCH_URL set in your .env file. dotenv.config() program.name('index').description('CLI scripts for indexing Docs data into Elasticsearch') const allVersionKeysWithAll = [...allIndexVersionKeys, 'all'] const generalSearchCommand = new Command('general-search') .description( 'Indexes records for general search. Records should be pre-scraped by the scrape script.', ) .option('-v, --verbose', 'Verbose outputs') .addOption( new Option('-V, --version [VERSION...]', 'Specific versions').choices(allVersionKeysWithAll), ) .addOption( new Option('-l, --language ', 'Which languages to focus on').choices(languageKeys), ) .addOption( new Option('--not-language ', 'Specific language to omit').choices(languageKeys), ) .option('-u, --elasticsearch-url ', 'If different from $ELASTICSEARCH_URL') .option('-p, --index-prefix ', 'Index string to put before index name') .option( '-s, --stagger-seconds ', 'Number of seconds to sleep between each bulk operation', (value) => { const parsed = parseInt(value, 10) if (isNaN(parsed)) { throw new InvalidArgumentError('Not a number.') } return parsed }, ) .option( '-r, --retries ', 'Number of retry attempts on recoverable network errors', (value) => { const parsed = parseInt(value, 10) if (isNaN(parsed)) { throw new InvalidArgumentError('Not a number.') } return parsed }, ) .option( '--sleep-time ', `Number of seconds to sleep between each retry attempt (defaults to 30)`, (value) => { const parsed = parseInt(value, 10) if (isNaN(parsed)) { throw new InvalidArgumentError('Not a number.') } return parsed }, 30, ) .argument('', 'where the indexable files are') .action(async (sourceDirectory, options) => { try { await indexGeneralSearch(sourceDirectory, options) } catch (error: any) { if (error instanceof errors.ElasticsearchClientError) { if ((error as any)?.meta) { console.error('Error meta: %O', (error as any).meta) } } console.error('general-search indexing error:', error.message) process.exit(1) } }) const aiSearchAutocompleteCommand = new Command('ai-search-autocomplete') .description('Index for AI search autocomplete') .addOption( new Option( '-l, --language ', 'Specific languages(s). (NOTE: Only english, "en" is currently supported', ).choices(['en']), ) .addOption( new Option('-v, --version ', 'Specific versions').choices(allVersionKeysWithAll), ) .option('--verbose', 'Verbose output') .option('--index-prefix ', 'Prefix for the index names', '') .argument('', 'path to the docs-internal-data repo') .action(async (dataRepoRoot: string, options) => { // In the future, we may want to support multiple languages // Currently (since this is an experiment), we only support english const languages = ['en'] const indexPrefix = options.indexPrefix || '' if (!Array.isArray(options.version)) { if (typeof options.version === 'undefined') { options.version = ['all'] } else { options.version = [options.version] } } let versions = options.version if (!versions.length || versions[0] === 'all') { versions = supportedAutocompletePlanVersions } else { versions = versions.map((version: string) => versionToIndexVersionMap[version]) } try { await indexAISearchAutocomplete({ dataRepoRoot, languages, versions, indexPrefix, }) } catch (error: any) { if (error instanceof errors.ElasticsearchClientError) { if ((error as any)?.meta) { console.error('Error meta: %O', (error as any).meta) } } console.error('ai-search-autocomplete indexing error:', error.message) process.exit(1) } }) program.addCommand(generalSearchCommand) program.addCommand(aiSearchAutocompleteCommand) program.parse(process.argv)