Spaces:
Runtime error
Runtime error
| # core/embedder.py | |
| """์๋ฒ ๋ฉ ์์ฑ""" | |
| from openai import OpenAI | |
| from typing import List, Dict | |
| from config.settings import OPENAI_API_KEY, EMBEDDING_MODEL | |
| import time | |
| client = OpenAI(api_key=OPENAI_API_KEY) | |
| def embed_chunks(chunks: List[Dict]) -> List[Dict]: | |
| """ | |
| ์ฒญํฌ ๋ฆฌ์คํธ๋ฅผ ์๋ฒ ๋ฉ | |
| Args: | |
| chunks: ์ฒญํฌ ๋ฆฌ์คํธ | |
| Returns: | |
| List[Dict]: ์๋ฒ ๋ฉ์ด ์ถ๊ฐ๋ ์ฒญํฌ | |
| [ | |
| { | |
| "chunk_id": "...", | |
| "text": "...", | |
| "embedding": [0.1, 0.2, ...], | |
| ... | |
| }, | |
| ... | |
| ] | |
| """ | |
| print(f"๐ข ์๋ฒ ๋ฉ ์์ ({len(chunks)}๊ฐ ์ฒญํฌ)") | |
| start_time = time.time() | |
| # ๋ฐฐ์น ์ฒ๋ฆฌ (OpenAI๋ ํ ๋ฒ์ ์ฌ๋ฌ ๊ฐ ๊ฐ๋ฅ) | |
| texts = [chunk["text"] for chunk in chunks] | |
| response = client.embeddings.create( | |
| model=EMBEDDING_MODEL, | |
| input=texts | |
| ) | |
| # ์๋ฒ ๋ฉ ์ถ๊ฐ | |
| for i, chunk in enumerate(chunks): | |
| chunk["embedding"] = response.data[i].embedding | |
| elapsed = time.time() - start_time | |
| print(f"โ ์๋ฒ ๋ฉ ์๋ฃ ({elapsed:.2f}์ด)") | |
| print(f" - ์๋: {len(chunks)/elapsed:.2f} chunks/sec") | |
| return chunks | |
| if __name__ == "__main__": | |
| # ํ ์คํธ | |
| test_chunks = [ | |
| {"chunk_id": "chunk_0", "text": "ํ ์คํธ ๋ฌธ์์ ๋๋ค."} | |
| ] | |
| embedded = embed_chunks(test_chunks) | |
| print(f"์๋ฒ ๋ฉ ์ฐจ์: {len(embedded[0]['embedding'])}") | |