File size: 1,483 Bytes
47b305f
 
 
 
 
 
 
 
d68b9da
 
 
 
47b305f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d68b9da
 
 
 
 
 
 
 
 
47b305f
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
alter table public.knowledge_chunks
    add column if not exists content_tsv tsvector
    generated always as (to_tsvector('simple', coalesce(content, ''))) stored;

create index if not exists knowledge_chunks_content_tsv_gin
on public.knowledge_chunks
using gin (content_tsv);

-- Build a tsquery using OR logic so any matching token scores a hit.
-- websearch_to_tsquery uses AND which requires ALL words in the query
-- (including stop words like "what", "apa", "saja") to appear in the chunk.
-- Instead, we tokenize via to_tsvector and join unique lexemes with | (OR).
create or replace function public.match_knowledge_fts(
    query_text text,
    match_count int DEFAULT 7
)
returns table (
    content text,
    source text,
    filename text,
    page_number integer,
    lexical_score float
)
language sql
stable
as $$
    with query_terms as (
        select to_tsquery(
            'simple',
            string_agg(lexeme, ' | ' order by lexeme)
        ) as tsq
        from (
            select distinct unnest(tsvector_to_array(
                to_tsvector('simple', query_text)
            )) as lexeme
        ) tokens
    )
    select
        kc.content,
        kc.source,
        kc.filename,
        kc.page_number,
        ts_rank_cd(kc.content_tsv, query_terms.tsq) as lexical_score
    from public.knowledge_chunks kc
    cross join query_terms
    where kc.content_tsv @@ query_terms.tsq
    order by lexical_score desc, kc.id asc
    limit match_count;
$$;