Spaces:
Sleeping
Sleeping
| alter table public.knowledge_chunks | |
| add column if not exists content_tsv tsvector | |
| generated always as (to_tsvector('simple', coalesce(content, ''))) stored; | |
| create index if not exists knowledge_chunks_content_tsv_gin | |
| on public.knowledge_chunks | |
| using gin (content_tsv); | |
| -- Build a tsquery using OR logic so any matching token scores a hit. | |
| -- websearch_to_tsquery uses AND which requires ALL words in the query | |
| -- (including stop words like "what", "apa", "saja") to appear in the chunk. | |
| -- Instead, we tokenize via to_tsvector and join unique lexemes with | (OR). | |
| create or replace function public.match_knowledge_fts( | |
| query_text text, | |
| match_count int DEFAULT 7 | |
| ) | |
| returns table ( | |
| content text, | |
| source text, | |
| filename text, | |
| page_number integer, | |
| lexical_score float | |
| ) | |
| language sql | |
| stable | |
| as $$ | |
| with query_terms as ( | |
| select to_tsquery( | |
| 'simple', | |
| string_agg(lexeme, ' | ' order by lexeme) | |
| ) as tsq | |
| from ( | |
| select distinct unnest(tsvector_to_array( | |
| to_tsvector('simple', query_text) | |
| )) as lexeme | |
| ) tokens | |
| ) | |
| select | |
| kc.content, | |
| kc.source, | |
| kc.filename, | |
| kc.page_number, | |
| ts_rank_cd(kc.content_tsv, query_terms.tsq) as lexical_score | |
| from public.knowledge_chunks kc | |
| cross join query_terms | |
| where kc.content_tsv @@ query_terms.tsq | |
| order by lexical_score desc, kc.id asc | |
| limit match_count; | |
| $$; | |