Spaces:
Running
Running
File size: 5,699 Bytes
95131e1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | /**
*
* Copyright 2023-present InspectorRAGet Team
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/
import { describe, it, expect } from 'vitest';
import { truncate, hash, overlaps } from '@/src/utilities/strings';
// --- truncate ---
describe('truncate', () => {
it('returns full text when shorter than limit', () => {
expect(truncate('hello', 10)).toBe('hello');
});
it('returns full text when exactly at limit', () => {
expect(truncate('hello', 5)).toBe('hello');
});
it('truncates and adds ellipsis when text exceeds limit', () => {
expect(truncate('hello world', 5)).toBe('hello ...');
});
it('handles empty string', () => {
expect(truncate('', 5)).toBe('');
});
it('handles limit of 0', () => {
expect(truncate('hello', 0)).toBe(' ...');
});
});
// --- hash ---
describe('hash', () => {
it('returns a hex string', () => {
const result = hash('test');
expect(result).toMatch(/^[0-9a-f]+$/);
});
it('produces consistent output for same input', () => {
expect(hash('hello')).toBe(hash('hello'));
});
it('produces different output for different inputs', () => {
expect(hash('hello')).not.toBe(hash('world'));
});
it('returns 32-char MD5 hash', () => {
expect(hash('test')).toHaveLength(32);
});
});
// --- overlaps ---
describe('overlaps', () => {
it('finds matching token sequences between source and target', () => {
const source = 'the quick brown fox';
const target = 'I saw the quick brown fox jump over the fence';
const result = overlaps(source, target);
expect(result.length).toBeGreaterThan(0);
expect(result[0].count).toBeGreaterThan(0);
});
it('returns empty array when no overlapping tokens', () => {
const source = 'alpha beta gamma';
const target = 'one two three four five six';
const result = overlaps(source, target);
expect(result).toEqual([]);
});
it('is case insensitive', () => {
const source = 'The Quick Brown';
const target = 'the quick brown fox';
const result = overlaps(source, target);
expect(result.length).toBeGreaterThan(0);
});
it('normalizes smart quotes before matching', () => {
const source = 'said \u201chello\u201d today';
const target = 'he said "hello" today at noon';
const result = overlaps(source, target);
expect(result.length).toBeGreaterThan(0);
});
it('respects min_match_tokens parameter', () => {
const source = 'one two three four five';
const target = 'one two three four five';
const result3 = overlaps(source, target, 3);
const result5 = overlaps(source, target, 5);
// With higher min_match_tokens, fewer but longer matches
expect(result3.length).toBeGreaterThanOrEqual(result5.length);
});
it('returns StringMatchObject with correct structure', () => {
const source = 'the brown fox jumped';
const target = 'the brown fox jumped over the lazy dog';
const result = overlaps(source, target);
if (result.length > 0) {
const match = result[0];
expect(match).toHaveProperty('start');
expect(match).toHaveProperty('end');
expect(match).toHaveProperty('text');
expect(match).toHaveProperty('matchesInTarget');
expect(match).toHaveProperty('count');
expect(typeof match.start).toBe('number');
expect(typeof match.end).toBe('number');
expect(typeof match.text).toBe('string');
expect(Array.isArray(match.matchesInTarget)).toBe(true);
}
});
it('handles source with fewer tokens than min_match_tokens', () => {
// Even with min_match_tokens=3, the function still matches if the
// substring (expanded past min tokens) finds a hit in the target
const source = 'hello world';
const target = 'hello world everyone';
const result = overlaps(source, target, 3);
// Source has 2 tokens but the do-while loop still attempts a match
// and finds "hello world" in the target
expect(result.length).toBeGreaterThanOrEqual(0);
});
it('finds multiple non-overlapping matches', () => {
const source = 'the quick brown fox and the lazy dog';
const target = 'I saw the quick brown fox then I saw the lazy dog sleeping';
const result = overlaps(source, target);
expect(result.length).toBeGreaterThanOrEqual(1);
});
it('handles empty source', () => {
const result = overlaps('', 'some target text');
expect(result).toEqual([]);
});
it('handles empty target', () => {
const result = overlaps('the quick brown fox', '');
expect(result).toEqual([]);
});
it('handles single-word source with default min_match_tokens', () => {
// Single token source: the inner while loop advances past source length,
// but the do-while still tries the substring and may find a match
const result = overlaps('hello', 'hello world');
// The function still finds "hello" because the do-while executes at least once
expect(result.length).toBe(1);
expect(result[0].text).toBe('hello');
});
it('handles single-word source with min_match_tokens=1', () => {
const result = overlaps('hello', 'hello world', 1);
expect(result.length).toBeGreaterThan(0);
});
});
|