busan_data_navigator / utils /get_embedding.py
atoye1's picture
pushing for deployment
18fb155
import json
import os
import sys
import pysqlite3
__import__('pysqlite3')
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
import chromadb
from chromadb.utils import embedding_functions
import openai
# OpenAI API ν‚€ μ„€μ •
openai.api_key = os.getenv("OPENAI_API_KEY")
# ChromaDB ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” (영ꡬ μ €μž₯μ†Œ μ‚¬μš©)
client = chromadb.PersistentClient(path="./data/chroma_db")
# OpenAI μž„λ² λ”© ν•¨μˆ˜ μ„€μ •
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=openai.api_key,
model_name="text-embedding-3-small"
)
# μ»¬λ ‰μ…˜ 이름 μ„€μ •
COLLECTION_NAME = "busan_data_navigation"
def get_chroma():
"""
λ‘œμ»¬μ— μ €μž₯된 ChromaDB μ»¬λ ‰μ…˜μ„ λ‘œλ“œν•©λ‹ˆλ‹€.
"""
try:
collection = client.get_collection(name=COLLECTION_NAME, embedding_function=openai_ef)
print(f"μ»¬λ ‰μ…˜ '{COLLECTION_NAME}'을 μ„±κ³΅μ μœΌλ‘œ λ‘œλ“œν–ˆμŠ΅λ‹ˆλ‹€.")
print(f"ν˜„μž¬ μ»¬λ ‰μ…˜μ—λŠ” {collection.count()} 개의 λ¬Έμ„œκ°€ μžˆμŠ΅λ‹ˆλ‹€.")
return collection
except ValueError as e:
print(f"였λ₯˜: μ»¬λ ‰μ…˜ '{COLLECTION_NAME}'을 λ‘œλ“œν•˜λŠ” 데 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€.")
print(f"였λ₯˜ λ©”μ‹œμ§€: {str(e)}")
print("μ»¬λ ‰μ…˜μ΄ μ‘΄μž¬ν•˜μ§€ μ•Šκ±°λ‚˜ μ†μƒλ˜μ—ˆμ„ 수 μžˆμŠ΅λ‹ˆλ‹€.")
return None
# 메인 μ‹€ν–‰ λΆ€λΆ„
if __name__ == "__main__":
# ChromaDB μ»¬λ ‰μ…˜ λ‘œλ“œ
collection = get_chroma()
if collection:
# 쿼리 μ‹€ν–‰ (ν…ŒμŠ€νŠΈμš©)
results = collection.query(
query_texts=["λΆ€μ‚°κ΄‘μ—­μ‹œ κ°•μ„œκ΅¬ μ–΄μ—… λ©΄ν—ˆμ— λŒ€ν•΄ μ•Œλ €μ£Όμ„Έμš”"],
n_results=10
)
with open('./data/id_to_metadata.json', 'r') as f:
id_to_metadata= json.load(f)
titles = [id_to_metadata[_id]['title'].strip() for _id in results['ids'][0]]
print("쿼리 결과:\n", '\n'.join(titles))
else:
print("μ»¬λ ‰μ…˜μ„ λ‘œλ“œν•  수 μ—†μ–΄ 쿼리λ₯Ό μ‹€ν–‰ν•  수 μ—†μŠ΅λ‹ˆλ‹€.")