Spaces:
Sleeping
Sleeping
File size: 1,483 Bytes
47b305f d68b9da 47b305f d68b9da 47b305f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | alter table public.knowledge_chunks
add column if not exists content_tsv tsvector
generated always as (to_tsvector('simple', coalesce(content, ''))) stored;
create index if not exists knowledge_chunks_content_tsv_gin
on public.knowledge_chunks
using gin (content_tsv);
-- Build a tsquery using OR logic so any matching token scores a hit.
-- websearch_to_tsquery uses AND which requires ALL words in the query
-- (including stop words like "what", "apa", "saja") to appear in the chunk.
-- Instead, we tokenize via to_tsvector and join unique lexemes with | (OR).
create or replace function public.match_knowledge_fts(
query_text text,
match_count int DEFAULT 7
)
returns table (
content text,
source text,
filename text,
page_number integer,
lexical_score float
)
language sql
stable
as $$
with query_terms as (
select to_tsquery(
'simple',
string_agg(lexeme, ' | ' order by lexeme)
) as tsq
from (
select distinct unnest(tsvector_to_array(
to_tsvector('simple', query_text)
)) as lexeme
) tokens
)
select
kc.content,
kc.source,
kc.filename,
kc.page_number,
ts_rank_cd(kc.content_tsv, query_terms.tsq) as lexical_score
from public.knowledge_chunks kc
cross join query_terms
where kc.content_tsv @@ query_terms.tsq
order by lexical_score desc, kc.id asc
limit match_count;
$$;
|