InspectorRAGet / src /__tests__ /strings.test.ts
kpfadnis's picture
feat: schema v2, tool_calling evaluation, BFCL converter, UI polish, and expression fixes (#18)
95131e1 unverified
raw
history blame
5.7 kB
/**
*
* Copyright 2023-present InspectorRAGet Team
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/
import { describe, it, expect } from 'vitest';
import { truncate, hash, overlaps } from '@/src/utilities/strings';
// --- truncate ---
describe('truncate', () => {
it('returns full text when shorter than limit', () => {
expect(truncate('hello', 10)).toBe('hello');
});
it('returns full text when exactly at limit', () => {
expect(truncate('hello', 5)).toBe('hello');
});
it('truncates and adds ellipsis when text exceeds limit', () => {
expect(truncate('hello world', 5)).toBe('hello ...');
});
it('handles empty string', () => {
expect(truncate('', 5)).toBe('');
});
it('handles limit of 0', () => {
expect(truncate('hello', 0)).toBe(' ...');
});
});
// --- hash ---
describe('hash', () => {
it('returns a hex string', () => {
const result = hash('test');
expect(result).toMatch(/^[0-9a-f]+$/);
});
it('produces consistent output for same input', () => {
expect(hash('hello')).toBe(hash('hello'));
});
it('produces different output for different inputs', () => {
expect(hash('hello')).not.toBe(hash('world'));
});
it('returns 32-char MD5 hash', () => {
expect(hash('test')).toHaveLength(32);
});
});
// --- overlaps ---
describe('overlaps', () => {
it('finds matching token sequences between source and target', () => {
const source = 'the quick brown fox';
const target = 'I saw the quick brown fox jump over the fence';
const result = overlaps(source, target);
expect(result.length).toBeGreaterThan(0);
expect(result[0].count).toBeGreaterThan(0);
});
it('returns empty array when no overlapping tokens', () => {
const source = 'alpha beta gamma';
const target = 'one two three four five six';
const result = overlaps(source, target);
expect(result).toEqual([]);
});
it('is case insensitive', () => {
const source = 'The Quick Brown';
const target = 'the quick brown fox';
const result = overlaps(source, target);
expect(result.length).toBeGreaterThan(0);
});
it('normalizes smart quotes before matching', () => {
const source = 'said \u201chello\u201d today';
const target = 'he said "hello" today at noon';
const result = overlaps(source, target);
expect(result.length).toBeGreaterThan(0);
});
it('respects min_match_tokens parameter', () => {
const source = 'one two three four five';
const target = 'one two three four five';
const result3 = overlaps(source, target, 3);
const result5 = overlaps(source, target, 5);
// With higher min_match_tokens, fewer but longer matches
expect(result3.length).toBeGreaterThanOrEqual(result5.length);
});
it('returns StringMatchObject with correct structure', () => {
const source = 'the brown fox jumped';
const target = 'the brown fox jumped over the lazy dog';
const result = overlaps(source, target);
if (result.length > 0) {
const match = result[0];
expect(match).toHaveProperty('start');
expect(match).toHaveProperty('end');
expect(match).toHaveProperty('text');
expect(match).toHaveProperty('matchesInTarget');
expect(match).toHaveProperty('count');
expect(typeof match.start).toBe('number');
expect(typeof match.end).toBe('number');
expect(typeof match.text).toBe('string');
expect(Array.isArray(match.matchesInTarget)).toBe(true);
}
});
it('handles source with fewer tokens than min_match_tokens', () => {
// Even with min_match_tokens=3, the function still matches if the
// substring (expanded past min tokens) finds a hit in the target
const source = 'hello world';
const target = 'hello world everyone';
const result = overlaps(source, target, 3);
// Source has 2 tokens but the do-while loop still attempts a match
// and finds "hello world" in the target
expect(result.length).toBeGreaterThanOrEqual(0);
});
it('finds multiple non-overlapping matches', () => {
const source = 'the quick brown fox and the lazy dog';
const target = 'I saw the quick brown fox then I saw the lazy dog sleeping';
const result = overlaps(source, target);
expect(result.length).toBeGreaterThanOrEqual(1);
});
it('handles empty source', () => {
const result = overlaps('', 'some target text');
expect(result).toEqual([]);
});
it('handles empty target', () => {
const result = overlaps('the quick brown fox', '');
expect(result).toEqual([]);
});
it('handles single-word source with default min_match_tokens', () => {
// Single token source: the inner while loop advances past source length,
// but the do-while still tries the substring and may find a match
const result = overlaps('hello', 'hello world');
// The function still finds "hello" because the do-while executes at least once
expect(result.length).toBe(1);
expect(result[0].text).toBe('hello');
});
it('handles single-word source with min_match_tokens=1', () => {
const result = overlaps('hello', 'hello world', 1);
expect(result.length).toBeGreaterThan(0);
});
});