File size: 5,699 Bytes
95131e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/**
 *
 * Copyright 2023-present InspectorRAGet Team
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 **/

import { describe, it, expect } from 'vitest';
import { truncate, hash, overlaps } from '@/src/utilities/strings';

// --- truncate ---

describe('truncate', () => {
  it('returns full text when shorter than limit', () => {
    expect(truncate('hello', 10)).toBe('hello');
  });

  it('returns full text when exactly at limit', () => {
    expect(truncate('hello', 5)).toBe('hello');
  });

  it('truncates and adds ellipsis when text exceeds limit', () => {
    expect(truncate('hello world', 5)).toBe('hello ...');
  });

  it('handles empty string', () => {
    expect(truncate('', 5)).toBe('');
  });

  it('handles limit of 0', () => {
    expect(truncate('hello', 0)).toBe(' ...');
  });
});

// --- hash ---

describe('hash', () => {
  it('returns a hex string', () => {
    const result = hash('test');
    expect(result).toMatch(/^[0-9a-f]+$/);
  });

  it('produces consistent output for same input', () => {
    expect(hash('hello')).toBe(hash('hello'));
  });

  it('produces different output for different inputs', () => {
    expect(hash('hello')).not.toBe(hash('world'));
  });

  it('returns 32-char MD5 hash', () => {
    expect(hash('test')).toHaveLength(32);
  });
});

// --- overlaps ---

describe('overlaps', () => {
  it('finds matching token sequences between source and target', () => {
    const source = 'the quick brown fox';
    const target = 'I saw the quick brown fox jump over the fence';
    const result = overlaps(source, target);
    expect(result.length).toBeGreaterThan(0);
    expect(result[0].count).toBeGreaterThan(0);
  });

  it('returns empty array when no overlapping tokens', () => {
    const source = 'alpha beta gamma';
    const target = 'one two three four five six';
    const result = overlaps(source, target);
    expect(result).toEqual([]);
  });

  it('is case insensitive', () => {
    const source = 'The Quick Brown';
    const target = 'the quick brown fox';
    const result = overlaps(source, target);
    expect(result.length).toBeGreaterThan(0);
  });

  it('normalizes smart quotes before matching', () => {
    const source = 'said \u201chello\u201d today';
    const target = 'he said "hello" today at noon';
    const result = overlaps(source, target);
    expect(result.length).toBeGreaterThan(0);
  });

  it('respects min_match_tokens parameter', () => {
    const source = 'one two three four five';
    const target = 'one two three four five';

    const result3 = overlaps(source, target, 3);
    const result5 = overlaps(source, target, 5);

    // With higher min_match_tokens, fewer but longer matches
    expect(result3.length).toBeGreaterThanOrEqual(result5.length);
  });

  it('returns StringMatchObject with correct structure', () => {
    const source = 'the brown fox jumped';
    const target = 'the brown fox jumped over the lazy dog';
    const result = overlaps(source, target);

    if (result.length > 0) {
      const match = result[0];
      expect(match).toHaveProperty('start');
      expect(match).toHaveProperty('end');
      expect(match).toHaveProperty('text');
      expect(match).toHaveProperty('matchesInTarget');
      expect(match).toHaveProperty('count');
      expect(typeof match.start).toBe('number');
      expect(typeof match.end).toBe('number');
      expect(typeof match.text).toBe('string');
      expect(Array.isArray(match.matchesInTarget)).toBe(true);
    }
  });

  it('handles source with fewer tokens than min_match_tokens', () => {
    // Even with min_match_tokens=3, the function still matches if the
    // substring (expanded past min tokens) finds a hit in the target
    const source = 'hello world';
    const target = 'hello world everyone';
    const result = overlaps(source, target, 3);
    // Source has 2 tokens but the do-while loop still attempts a match
    // and finds "hello world" in the target
    expect(result.length).toBeGreaterThanOrEqual(0);
  });

  it('finds multiple non-overlapping matches', () => {
    const source = 'the quick brown fox and the lazy dog';
    const target = 'I saw the quick brown fox then I saw the lazy dog sleeping';
    const result = overlaps(source, target);
    expect(result.length).toBeGreaterThanOrEqual(1);
  });

  it('handles empty source', () => {
    const result = overlaps('', 'some target text');
    expect(result).toEqual([]);
  });

  it('handles empty target', () => {
    const result = overlaps('the quick brown fox', '');
    expect(result).toEqual([]);
  });

  it('handles single-word source with default min_match_tokens', () => {
    // Single token source: the inner while loop advances past source length,
    // but the do-while still tries the substring and may find a match
    const result = overlaps('hello', 'hello world');
    // The function still finds "hello" because the do-while executes at least once
    expect(result.length).toBe(1);
    expect(result[0].text).toBe('hello');
  });

  it('handles single-word source with min_match_tokens=1', () => {
    const result = overlaps('hello', 'hello world', 1);
    expect(result.length).toBeGreaterThan(0);
  });
});