Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import sys | |
| import pysqlite3 | |
| __import__('pysqlite3') | |
| sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
| import chromadb | |
| from chromadb.utils import embedding_functions | |
| import openai | |
| # OpenAI API ν€ μ€μ | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| # ChromaDB ν΄λΌμ΄μΈνΈ μ΄κΈ°ν (μꡬ μ μ₯μ μ¬μ©) | |
| client = chromadb.PersistentClient(path="./data/chroma_db") | |
| # OpenAI μλ² λ© ν¨μ μ€μ | |
| openai_ef = embedding_functions.OpenAIEmbeddingFunction( | |
| api_key=openai.api_key, | |
| model_name="text-embedding-3-small" | |
| ) | |
| # 컬λ μ μ΄λ¦ μ€μ | |
| COLLECTION_NAME = "busan_data_navigation" | |
| def get_chroma(): | |
| """ | |
| λ‘컬μ μ μ₯λ ChromaDB 컬λ μ μ λ‘λν©λλ€. | |
| """ | |
| try: | |
| collection = client.get_collection(name=COLLECTION_NAME, embedding_function=openai_ef) | |
| print(f"컬λ μ '{COLLECTION_NAME}'μ μ±κ³΅μ μΌλ‘ λ‘λνμ΅λλ€.") | |
| print(f"νμ¬ μ»¬λ μ μλ {collection.count()} κ°μ λ¬Έμκ° μμ΅λλ€.") | |
| return collection | |
| except ValueError as e: | |
| print(f"μ€λ₯: 컬λ μ '{COLLECTION_NAME}'μ λ‘λνλ λ° μ€ν¨νμ΅λλ€.") | |
| print(f"μ€λ₯ λ©μμ§: {str(e)}") | |
| print("컬λ μ μ΄ μ‘΄μ¬νμ§ μκ±°λ μμλμμ μ μμ΅λλ€.") | |
| return None | |
| # λ©μΈ μ€ν λΆλΆ | |
| if __name__ == "__main__": | |
| # ChromaDB 컬λ μ λ‘λ | |
| collection = get_chroma() | |
| if collection: | |
| # 쿼리 μ€ν (ν μ€νΈμ©) | |
| results = collection.query( | |
| query_texts=["λΆμ°κ΄μμ κ°μꡬ μ΄μ λ©΄νμ λν΄ μλ €μ£ΌμΈμ"], | |
| n_results=10 | |
| ) | |
| with open('./data/id_to_metadata.json', 'r') as f: | |
| id_to_metadata= json.load(f) | |
| titles = [id_to_metadata[_id]['title'].strip() for _id in results['ids'][0]] | |
| print("쿼리 결과:\n", '\n'.join(titles)) | |
| else: | |
| print("컬λ μ μ λ‘λν μ μμ΄ μΏΌλ¦¬λ₯Ό μ€νν μ μμ΅λλ€.") |