Buckets:

ktongue
/

docker_container

ktongue/docker_container / .cache /opencode /node_modules /ret /dist /tokenizer.js

14.7 kB

	"use strict";
	var __createBinding = (this && this.__createBinding) \|\| (Object.create ? (function(o, m, k, k2) {
	if (k2 === undefined) k2 = k;
	Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
	}) : (function(o, m, k, k2) {
	if (k2 === undefined) k2 = k;
	o[k2] = m[k];
	}));
	var __setModuleDefault = (this && this.__setModuleDefault) \|\| (Object.create ? (function(o, v) {
	Object.defineProperty(o, "default", { enumerable: true, value: v });
	}) : function(o, v) {
	o["default"] = v;
	});
	var __importStar = (this && this.__importStar) \|\| function (mod) {
	if (mod && mod.__esModule) return mod;
	var result = {};
	if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
	__setModuleDefault(result, mod);
	return result;
	};
	Object.defineProperty(exports, "__esModule", { value: true });
	exports.tokenizer = void 0;
	const util = __importStar(require("./util"));
	const types_1 = require("./types");
	const sets = __importStar(require("./sets"));
	/**
	* Valid opening characters for capture group names.
	*/
	const captureGroupFirstChar = /^[a-zA-Z_$]$/i;
	/**
	* Valid characters for capture group names.
	*/
	const captureGroupChars = /^[a-zA-Z0-9_$]$/i;
	const digit = /\d/;
	/**
	* Tokenizes a regular expression (that is currently a string)
	* @param {string} regexpStr String of regular expression to be tokenized
	*
	* @returns {Root}
	*/
	exports.tokenizer = (regexpStr) => {
	let i = 0, c;
	let start = { type: types_1.types.ROOT, stack: [] };
	// Keep track of last clause/group and stack.
	let lastGroup = start;
	let last = start.stack;
	let groupStack = [];
	let referenceQueue = [];
	let groupCount = 0;
	const repeatErr = (col) => {
	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Nothing to repeat at column ${col - 1}`);
	};
	// Decode a few escaped characters.
	let str = util.strToChars(regexpStr);
	// Iterate through each character in string.
	while (i < str.length) {
	switch (c = str[i++]) {
	// Handle escaped characters, inclues a few sets.
	case '\\':
	if (i === str.length) {
	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: \\ at end of pattern`);
	}
	switch (c = str[i++]) {
	case 'b':
	last.push({ type: types_1.types.POSITION, value: 'b' });
	break;
	case 'B':
	last.push({ type: types_1.types.POSITION, value: 'B' });
	break;
	case 'w':
	last.push(sets.words());
	break;
	case 'W':
	last.push(sets.notWords());
	break;
	case 'd':
	last.push(sets.ints());
	break;
	case 'D':
	last.push(sets.notInts());
	break;
	case 's':
	last.push(sets.whitespace());
	break;
	case 'S':
	last.push(sets.notWhitespace());
	break;
	default:
	// Check if c is integer.
	// In which case it's a reference.
	if (digit.test(c)) {
	let digits = c;
	while (i < str.length && digit.test(str[i])) {
	digits += str[i++];
	}
	let value = parseInt(digits, 10);
	const reference = { type: types_1.types.REFERENCE, value };
	last.push(reference);
	referenceQueue.push({ reference, stack: last, index: last.length - 1 });
	// Escaped character.
	}
	else {
	last.push({ type: types_1.types.CHAR, value: c.charCodeAt(0) });
	}
	}
	break;
	// Positionals.
	case '^':
	last.push({ type: types_1.types.POSITION, value: '^' });
	break;
	case '$':
	last.push({ type: types_1.types.POSITION, value: '$' });
	break;
	// Handle custom sets.
	case '[': {
	// Check if this class is 'anti' i.e. [^abc].
	let not;
	if (str[i] === '^') {
	not = true;
	i++;
	}
	else {
	not = false;
	}
	// Get all the characters in class.
	let classTokens = util.tokenizeClass(str.slice(i), regexpStr);
	// Increase index by length of class.
	i += classTokens[1];
	last.push({
	type: types_1.types.SET,
	set: classTokens[0],
	not,
	});
	break;
	}
	// Class of any character except \n.
	case '.':
	last.push(sets.anyChar());
	break;
	// Push group onto stack.
	case '(': {
	// Create group.
	let group = {
	type: types_1.types.GROUP,
	stack: [],
	remember: true,
	};
	// If this is a special kind of group.
	if (str[i] === '?') {
	c = str[i + 1];
	i += 2;
	// Match if followed by.
	if (c === '=') {
	group.followedBy = true;
	group.remember = false;
	// Match if not followed by.
	}
	else if (c === '!') {
	group.notFollowedBy = true;
	group.remember = false;
	}
	else if (c === '<') {
	let name = '';
	if (captureGroupFirstChar.test(str[i])) {
	name += str[i];
	i++;
	}
	else {
	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Invalid capture group name, character '${str[i]}'` +
	` after '<' at column ${i + 1}`);
	}
	while (i < str.length && captureGroupChars.test(str[i])) {
	name += str[i];
	i++;
	}
	if (!name) {
	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Invalid capture group name, character '${str[i]}'` +
	` after '<' at column ${i + 1}`);
	}
	if (str[i] !== '>') {
	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unclosed capture group name, expected '>', found` +
	` '${str[i]}' at column ${i + 1}`);
	}
	group.name = name;
	i++;
	}
	else if (c === ':') {
	group.remember = false;
	}
	else {
	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Invalid group, character '${c}'` +
	` after '?' at column ${i - 1}`);
	}
	}
	else {
	groupCount += 1;
	}
	// Insert subgroup into current group stack.
	last.push(group);
	// Remember the current group for when the group closes.
	groupStack.push(lastGroup);
	// Make this new group the current group.
	lastGroup = group;
	last = group.stack;
	break;
	}
	// Pop group out of stack.
	case ')':
	if (groupStack.length === 0) {
	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unmatched ) at column ${i - 1}`);
	}
	lastGroup = groupStack.pop();
	// Check if this group has a PIPE.
	// To get back the correct last stack.
	last = lastGroup.options ?
	lastGroup.options[lastGroup.options.length - 1] :
	lastGroup.stack;
	break;
	// Use pipe character to give more choices.
	case '\|': {
	// Create array where options are if this is the first PIPE
	// in this clause.
	if (!lastGroup.options) {
	lastGroup.options = [lastGroup.stack];
	delete lastGroup.stack;
	}
	// Create a new stack and add to options for rest of clause.
	let stack = [];
	lastGroup.options.push(stack);
	last = stack;
	break;
	}
	// Repetition.
	// For every repetition, remove last element from last stack
	// then insert back a RANGE object.
	// This design is chosen because there could be more than
	// one repetition symbols in a regex i.e. `a?+{2,3}`.
	case '{': {
	let rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
	if (rs !== null) {
	if (last.length === 0) {
	repeatErr(i);
	}
	min = parseInt(rs[1], 10);
	max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
	i += rs[0].length;
	last.push({
	type: types_1.types.REPETITION,
	min,
	max,
	value: last.pop(),
	});
	}
	else {
	last.push({
	type: types_1.types.CHAR,
	value: 123,
	});
	}
	break;
	}
	case '?':
	if (last.length === 0) {
	repeatErr(i);
	}
	last.push({
	type: types_1.types.REPETITION,
	min: 0,
	max: 1,
	value: last.pop(),
	});
	break;
	case '+':
	if (last.length === 0) {
	repeatErr(i);
	}
	last.push({
	type: types_1.types.REPETITION,
	min: 1,
	max: Infinity,
	value: last.pop(),
	});
	break;
	case '*':
	if (last.length === 0) {
	repeatErr(i);
	}
	last.push({
	type: types_1.types.REPETITION,
	min: 0,
	max: Infinity,
	value: last.pop(),
	});
	break;
	// Default is a character that is not `\[](){}?+*^$`.
	default:
	last.push({
	type: types_1.types.CHAR,
	value: c.charCodeAt(0),
	});
	}
	}
	// Check if any groups have not been closed.
	if (groupStack.length !== 0) {
	throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unterminated group`);
	}
	updateReferences(referenceQueue, groupCount);
	return start;
	};
	/**
	* This is a side effecting function that changes references to chars
	* if there are not enough capturing groups to reference
	* See: https://github.com/fent/ret.js/pull/39#issuecomment-1006475703
	* See: https://github.com/fent/ret.js/issues/38
	* @param {(Reference \| Char)[]} referenceQueue
	* @param {number} groupCount
	* @returns {void}
	*/
	function updateReferences(referenceQueue, groupCount) {
	// Note: We go through the queue in reverse order so
	// that index we use is correct even if we have to add
	// multiple tokens to one stack
	for (const elem of referenceQueue.reverse()) {
	if (groupCount < elem.reference.value) {
	// If there is nothing to reference then turn this into a char token
	elem.reference.type = types_1.types.CHAR;
	const valueString = elem.reference.value.toString();
	elem.reference.value = parseInt(valueString, 8);
	// If the number is not octal then we need to create multiple tokens
	// https://github.com/fent/ret.js/pull/39#issuecomment-1008229226
	if (!/^[0-7]+$/.test(valueString)) {
	let i = 0;
	while (valueString[i] !== '8' && valueString[i] !== '9') {
	i += 1;
	}
	if (i === 0) {
	// Handling case when escaped number starts with 8 or 9
	elem.reference.value = valueString.charCodeAt(0);
	i += 1;
	}
	else {
	// If the escaped number does not start with 8 or 9, then all
	// 0-7 digits before the first 8/9 form the first character code
	// see: https://github.com/fent/ret.js/pull/39#discussion_r780747085
	elem.reference.value = parseInt(valueString.slice(0, i), 8);
	}
	if (valueString.length > i) {
	const tail = elem.stack.splice(elem.index + 1);
	for (const char of valueString.slice(i)) {
	elem.stack.push({
	type: types_1.types.CHAR,
	value: char.charCodeAt(0),
	});
	}
	elem.stack.push(...tail);
	}
	}
	}
	}
	}
	//# sourceMappingURL=tokenizer.js.map

Xet Storage Details

Size:: 14.7 kB
Xet hash:: ef72c7f6b040db4ab5ad5677d45ffec60fb68908b9447f8bff3a67a340d7eaac

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.