RAG_HF / SQL_Example.txt
hueonsu654-arch
.
e80a9bf
-- DDL
-- 1. pgvector ν™•μž₯이 μ—†λ‹€λ©΄ λ¨Όμ € 생성해야 ν•©λ‹ˆλ‹€.
CREATE EXTENSION IF NOT EXISTS vector;
-- 2. ν…Œμ΄λΈ” 생성
CREATE TABLE t_test_textembedding (
id BIGSERIAL PRIMARY KEY, -- PK (μžλ™ 증가)
title VARCHAR(500) NOT NULL, -- 제λͺ©
title_embedding VECTOR(768), -- 제λͺ© μž„λ² λ”© (768차원)
content TEXT NOT NULL, -- λ‚΄μš©
content_embedding VECTOR(768), -- λ‚΄μš© μž„λ² λ”© (768차원)
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- 생성일
);
-- 3. (선택) 벑터 검색 μ„±λŠ₯을 높이기 μœ„ν•œ 인덱슀 생성 (HNSW μ•Œκ³ λ¦¬μ¦˜, 코사인 μœ μ‚¬λ„ κΈ°μ€€)
CREATE INDEX idx_title_embedding ON t_test_textembedding USING hnsw (title_embedding vector_cosine_ops);
CREATE INDEX idx_content_embedding ON t_test_textembedding USING hnsw (content_embedding vector_cosine_ops);
--------------------------------
-- 1. pgvector ν™•μž₯이 μ—†λ‹€λ©΄ λ¨Όμ € 생성해야 ν•©λ‹ˆλ‹€.
CREATE EXTENSION IF NOT EXISTS vector;
-- 2. ν…Œμ΄λΈ” 생성
CREATE TABLE t_test_imgembedding (
id BIGSERIAL PRIMARY KEY, -- PK (μžλ™ 증가)
title VARCHAR ,
url VARCHAR ,
mimetype VARCHAR ,
img_embedding VECTOR(1280), -- λ‚΄μš© μž„λ² λ”© (768차원)
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- 생성일
);
-- 3. (선택) 벑터 검색 μ„±λŠ₯을 높이기 μœ„ν•œ 인덱슀 생성 (HNSW μ•Œκ³ λ¦¬μ¦˜, 코사인 μœ μ‚¬λ„ κΈ°μ€€)
CREATE INDEX idx_test_imgembedding ON t_test_imgembedding USING hnsw (img_embedding vector_cosine_ops);
----------------------------------
// title_embedding_arr, content_embedding_arrλŠ”
// ONNX λͺ¨λΈμ—μ„œ μΆ”μΆœν•œ 768개의 μˆ«μžκ°€ λ‹΄κΈ΄ λ°°μ—΄(Array)μž…λ‹ˆλ‹€.
let insertData = await db.query(
`
INSERT INTO t_test_textembedding (title, title_embedding, content, content_embedding)
VALUES ($1, $2, $3, $4)
RETURNING id, title, created_at;
`,
[
title,
JSON.stringify(title_embedding_arr), // DB λ“œλΌμ΄λ²„ ν˜Έν™˜μ„±μ„ μœ„ν•΄ λ¬Έμžμ—΄ 포맷 '[...]' 으둜 λ³€ν™˜
content,
JSON.stringify(content_embedding_arr)
]
);
console.log('μƒμ„±λœ 데이터:', insertData.rows[0]);
-----------------------------------------
// query_embedding_arrλŠ” μ‚¬μš©μžμ˜ 검색어λ₯Ό ONNX λͺ¨λΈμ— 돌렀 λ‚˜μ˜¨ μž„λ² λ”© λ°°μ—΄μž…λ‹ˆλ‹€.
let searchResult = await db.query(
`
SELECT
id,
title,
content,
-- 코사인 κ±°λ¦¬λŠ” 0에 κ°€κΉŒμšΈμˆ˜λ‘ μœ μ‚¬ν•˜λ―€λ‘œ, 직관적인 'μœ μ‚¬λ„ 점수'λ₯Ό μœ„ν•΄ 1μ—μ„œ λΊλ‹ˆλ‹€.
1 - (content_embedding <=> $1) AS similarity_score
FROM t_test_textembedding
-- μœ μ‚¬λ„ μž„κ³„κ°’ μ„€μ • (예: 정확도 μ μˆ˜κ°€ 0.5 이상인 κ²ƒλ§Œ)
WHERE 1 - (content_embedding <=> $1) > 0.5
ORDER BY content_embedding <=> $1 ASC
LIMIT 5;
`,
[JSON.stringify(query_embedding_arr)]
);
console.log('μœ μ‚¬ν•œ 데이터 λͺ©λ‘:', searchResult.rows);