Spaces:
Paused
Paused
| """ | |
| ๋ฒกํฐ ์คํ ์ด, ์๋ฒ ๋ฉ ๋ชจ๋ธ, LLM ๋ฑ ๊ตฌ์ฑ ์์ ์ค์ | |
| ํ๊ฒฝ ๋ณ์ ๋ฐ .env ํ์ผ ํ์ฉ ๊ฐ์ ๋ฒ์ - HuggingFace ํ๊ฒฝ ์ง์ ์ถ๊ฐ | |
| """ | |
| import os | |
| import logging | |
| import sys | |
| import re | |
| import requests | |
| import json | |
| from pathlib import Path | |
| from typing import Dict, Any | |
| from dotenv import load_dotenv | |
| # ๋ก๊น ์ค์ | |
| logger = logging.getLogger("Config") | |
| # ํ์ฌ ์คํ ์์น ํ์ธ (๋๋ฒ๊น ์ฉ) | |
| script_dir = os.path.dirname(os.path.abspath(__file__)) | |
| logger.info(f"์คํฌ๋ฆฝํธ ๋๋ ํ ๋ฆฌ: {script_dir}") | |
| logger.info(f"ํ์ฌ ์์ ๋๋ ํ ๋ฆฌ: {os.getcwd()}") | |
| logger.info(f"์ด์ ์ฒด์ : {os.name}") | |
| # ํ๊ฒฝ ๊ฐ์ง - HuggingFace Space ํ๊ฒฝ์ธ์ง ํ์ธ | |
| IS_HUGGINGFACE = False | |
| if os.getenv('SPACE_ID') is not None or os.getenv('SYSTEM') == 'spaces': | |
| IS_HUGGINGFACE = True | |
| logger.info("HuggingFace Spaces ํ๊ฒฝ์ด ๊ฐ์ง๋์์ต๋๋ค.") | |
| else: | |
| # ๋ก์ปฌ ํ๊ฒฝ์ธ ๊ฒฝ์ฐ .env ํ์ผ ๋ก๋ | |
| # .env ํ์ผ ์์น ํ๋ณด๋ค | |
| env_paths = [ | |
| ".env", # ํ์ฌ ๋๋ ํ ๋ฆฌ | |
| os.path.join(script_dir, ".env"), # ์คํฌ๋ฆฝํธ ๋๋ ํ ๋ฆฌ | |
| os.path.join(script_dir, "config", ".env"), # config ํ์ ๋๋ ํ ๋ฆฌ | |
| os.path.join(os.path.dirname(script_dir), ".env"), # ์์ ๋๋ ํ ๋ฆฌ | |
| ] | |
| # .env ํ์ผ ์ฐพ์์ ๋ก๋ | |
| env_loaded = False | |
| for env_path in env_paths: | |
| if os.path.isfile(env_path): | |
| logger.info(f".env ํ์ผ ๋ฐ๊ฒฌ: {env_path}") | |
| env_loaded = load_dotenv(env_path, verbose=True) | |
| if env_loaded: | |
| logger.info(f".env ํ์ผ ๋ก๋ ์ฑ๊ณต: {env_path}") | |
| break | |
| if not env_loaded: | |
| logger.warning(".env ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค. ๊ธฐ๋ณธ๊ฐ ๋๋ ์์คํ ํ๊ฒฝ ๋ณ์๋ฅผ ์ฌ์ฉํฉ๋๋ค.") | |
| logger.info(f"๋ก์ปฌ ํ๊ฒฝ์์ ์คํ ์ค์ ๋๋ค. (OS: {'Windows' if os.name == 'nt' else 'Unix/Linux/MacOS'})") | |
| # Windows ํ๊ฒฝ ๊ฐ์ง | |
| IS_WINDOWS = os.name == 'nt' | |
| # ์ ํธ๋ฆฌํฐ ํจ์: ํ๊ฒฝ ๋ณ์ ๊ฐ์ ธ์ค๊ธฐ (HuggingFace ํ๊ฒฝ๊ณผ ๋ก์ปฌ ํ๊ฒฝ ๊ตฌ๋ถ) | |
| def get_env(key: str, default: Any = None, required: bool = False) -> Any: | |
| """ | |
| ํ๊ฒฝ ๋ณ์๋ฅผ ๊ฐ์ ธ์ค๋ ์ ํธ๋ฆฌํฐ ํจ์ (HuggingFace ํ๊ฒฝ ์ง์) | |
| Args: | |
| key: ํ๊ฒฝ ๋ณ์ ํค | |
| default: ํ๊ฒฝ ๋ณ์๊ฐ ์์ ๊ฒฝ์ฐ ๊ธฐ๋ณธ๊ฐ | |
| required: ํ๊ฒฝ ๋ณ์๊ฐ ํ์์ ์ธ์ง ์ฌ๋ถ | |
| Returns: | |
| ํ๊ฒฝ ๋ณ์ ๊ฐ ๋๋ ๊ธฐ๋ณธ๊ฐ | |
| """ | |
| # HuggingFace Spaces ํ๊ฒฝ์์๋ ๋ด๋ถ ํ๊ฒฝ๋ณ์ ํ์ฉ | |
| if IS_HUGGINGFACE: | |
| # HuggingFace Spaces์์๋ ์ํฌ๋ฆฟ ๊ฐ์ ์ง์ ์ฌ์ฉ | |
| # HF_SECRET_<KEY> ํ์์ผ๋ก ์ ์ฅ๋ ์ํฌ๋ฆฟ ํ์ธ | |
| hf_secret_key = f"HF_SECRET_{key.upper()}" | |
| value = os.getenv(hf_secret_key) | |
| # ์ํฌ๋ฆฟ์ด ์์ผ๋ฉด ์ผ๋ฐ ํ๊ฒฝ๋ณ์ ํ์ธ | |
| if value is None: | |
| value = os.getenv(key, default) | |
| else: | |
| # ๋ก์ปฌ ํ๊ฒฝ์์๋ ์ผ๋ฐ์ ์ธ ๋ฐฉ์์ผ๋ก ํ๊ฒฝ๋ณ์ ๊ฐ์ ธ์ค๊ธฐ | |
| value = os.getenv(key, default) | |
| if required and value is None: | |
| if IS_HUGGINGFACE: | |
| error_msg = f"ํ์ ํ๊ฒฝ ๋ณ์ {key}๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. HuggingFace Space์์ ์ํฌ๋ฆฟ์ ์ค์ ํด์ฃผ์ธ์." | |
| logger.error(error_msg) | |
| raise ValueError(error_msg) | |
| else: | |
| error_msg = f"ํ์ ํ๊ฒฝ ๋ณ์ {key}๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. .env ํ์ผ์ ์ถ๊ฐํด์ฃผ์ธ์." | |
| logger.error(error_msg) | |
| raise ValueError(error_msg) | |
| return value | |
| # ๊ฒฝ๋ก ์์ฑ ์ ํธ๋ฆฌํฐ ํจ์ | |
| def ensure_absolute_path(path_str: str) -> str: | |
| """ | |
| ์๋ ๊ฒฝ๋ก๋ฅผ ์ ๋ ๊ฒฝ๋ก๋ก ๋ณํ (Windows ๊ฒฝ๋ก ์ง์) | |
| Args: | |
| path_str: ๋ณํํ ๊ฒฝ๋ก ๋ฌธ์์ด | |
| Returns: | |
| ์ ๋ ๊ฒฝ๋ก | |
| """ | |
| # Windows ๋๋ผ์ด๋ธ ๋ฌธ์(C:\ ๋ฑ)๋ก ์์ํ๋ ๊ฒฝ๋ก ํ์ธ | |
| if IS_WINDOWS and re.match(r'^[a-zA-Z]:\\', path_str): | |
| logger.info(f"Windows ์ ๋ ๊ฒฝ๋ก ๊ฐ์ง: {path_str}") | |
| # Windows ์ ๋ ๊ฒฝ๋ก๋ ๊ทธ๋๋ก ์ฌ์ฉ | |
| return path_str | |
| path = Path(path_str) | |
| if path.is_absolute(): | |
| return str(path) | |
| # ์คํฌ๋ฆฝํธ ๋๋ ํ ๋ฆฌ ๊ธฐ์ค ๊ฒฝ๋ก | |
| script_based_path = Path(script_dir) / path | |
| # ํ์ฌ ์์ ๋๋ ํ ๋ฆฌ ๊ธฐ์ค ๊ฒฝ๋ก | |
| cwd_based_path = Path.cwd() / path | |
| # ๋ ๊ฒฝ๋ก ์ค ์กด์ฌํ๋ ๊ฒฝ๋ก ์ฐ์ ์ฌ์ฉ | |
| if script_based_path.exists(): | |
| return str(script_based_path) | |
| elif cwd_based_path.exists(): | |
| return str(cwd_based_path) | |
| else: | |
| # ๊ธฐ๋ณธ์ ์ผ๋ก ํ์ฌ ์์ ๋๋ ํ ๋ฆฌ ๊ธฐ์ค ๊ฒฝ๋ก ๋ฐํ | |
| return str(cwd_based_path) | |
| # Windows ๊ฒฝ๋ก ์ฒ๋ฆฌ๋ฅผ ์ํ ์ ํธ๋ฆฌํฐ ํจ์ | |
| def normalize_path(path_str: str) -> str: | |
| """ | |
| ๊ฒฝ๋ก ๋ฌธ์์ด์ ์ ๊ทํํ์ฌ OS์ ๋ง๊ฒ ๋ณํ | |
| Args: | |
| path_str: ๋ณํํ ๊ฒฝ๋ก ๋ฌธ์์ด | |
| Returns: | |
| ์ ๊ทํ๋ ๊ฒฝ๋ก | |
| """ | |
| # Windows ๊ฒฝ๋ก ํ์('\')์ OS์ ๋ง๊ฒ ๋ณํ | |
| return os.path.normpath(path_str) | |
| # ๊ธฐ๋ณธ ๋๋ ํ ๋ฆฌ ์ค์ (์ ๋ ๊ฒฝ๋ก๋ก ๋ณํ) | |
| PDF_DIRECTORY_RAW = get_env("PDF_DIRECTORY", "documents") | |
| # Windows ๋ฐฑ์ฌ๋์ ์ด์ค ์ฒ๋ฆฌ๋ฅผ ์ํด ์ ๊ทํ | |
| PDF_DIRECTORY_RAW = normalize_path(PDF_DIRECTORY_RAW) | |
| PDF_DIRECTORY = ensure_absolute_path(PDF_DIRECTORY_RAW) | |
| CACHE_DIRECTORY_RAW = get_env("CACHE_DIRECTORY", "cached_data") | |
| CACHE_DIRECTORY_RAW = normalize_path(CACHE_DIRECTORY_RAW) | |
| CACHE_DIRECTORY = ensure_absolute_path(CACHE_DIRECTORY_RAW) | |
| logger.info(f"PDF ๋๋ ํ ๋ฆฌ (์๋ณธ): {PDF_DIRECTORY_RAW}") | |
| logger.info(f"PDF ๋๋ ํ ๋ฆฌ (์ ๋): {PDF_DIRECTORY}") | |
| logger.info(f"์บ์ ๋๋ ํ ๋ฆฌ (์๋ณธ): {CACHE_DIRECTORY_RAW}") | |
| logger.info(f"์บ์ ๋๋ ํ ๋ฆฌ (์ ๋): {CACHE_DIRECTORY}") | |
| # ์ฒญํน ์ค์ | |
| CHUNK_SIZE = int(get_env("CHUNK_SIZE", "1000")) | |
| CHUNK_OVERLAP = int(get_env("CHUNK_OVERLAP", "200")) | |
| # API ํค ๋ฐ ํ๊ฒฝ ์ค์ | |
| OPENAI_API_KEY = get_env("OPENAI_API_KEY", "") | |
| LANGFUSE_PUBLIC_KEY = get_env("LANGFUSE_PUBLIC_KEY", "") | |
| LANGFUSE_SECRET_KEY = get_env("LANGFUSE_SECRET_KEY", "") | |
| LANGFUSE_HOST = get_env("LANGFUSE_HOST", "https://cloud.langfuse.com") | |
| # DeepSeek ๊ด๋ จ ์ค์ ์ถ๊ฐ | |
| DEEPSEEK_API_KEY = get_env("DEEPSEEK_API_KEY", "") | |
| DEEPSEEK_ENDPOINT = get_env("DEEPSEEK_ENDPOINT", "https://api.deepseek.com/v1/chat/completions") | |
| DEEPSEEK_MODEL = get_env("DEEPSEEK_MODEL", "deepseek-chat") | |
| # ํ๊น ํ์ด์ค ํ๊ฒฝ์์ API ํค ํ์ธ ๋ฐ ๋ก๊ทธ ์ถ๋ ฅ | |
| if IS_HUGGINGFACE: | |
| logger.info(f"ํ๊น ํ์ด์ค ํ๊ฒฝ์์ DeepSeek API ํค ์กด์ฌ ์ฌ๋ถ: {bool(DEEPSEEK_API_KEY)}") | |
| # ๋ณด์์ ์ํด API ํค ์ฒซ 4์๋ฆฌ์ ๋ง์ง๋ง 4์๋ฆฌ๋ง ํ์ (ํค๊ฐ ์กด์ฌํ๋ ๊ฒฝ์ฐ) | |
| if DEEPSEEK_API_KEY: | |
| masked_key = DEEPSEEK_API_KEY[:4] + "****" + DEEPSEEK_API_KEY[-4:] if len(DEEPSEEK_API_KEY) > 8 else "****" | |
| logger.info(f"DeepSeek API ํค: {masked_key}") | |
| logger.info(f"DeepSeek ๋ชจ๋ธ: {DEEPSEEK_MODEL}") | |
| logger.info(f"DeepSeek ์๋ํฌ์ธํธ: {DEEPSEEK_ENDPOINT}") | |
| # Milvus ๋ฒกํฐ DB ์ค์ | |
| MILVUS_HOST = get_env("MILVUS_HOST", "localhost") | |
| MILVUS_PORT = get_env("MILVUS_PORT", "19530") | |
| MILVUS_COLLECTION = get_env("MILVUS_COLLECTION", "pdf_documents") | |
| # ์๋ฒ ๋ฉ ๋ชจ๋ธ ์ค์ | |
| EMBEDDING_MODEL = get_env("EMBEDDING_MODEL", "Alibaba-NLP/gte-multilingual-base") # ๋ค๊ตญ์ด ์ง์ ๋ชจ๋ธ | |
| RERANKER_MODEL = get_env("RERANKER_MODEL", "Alibaba-NLP/gte-multilingual-reranker-base") # ๋ค๊ตญ์ด ์ง์ ๋ฆฌ๋ญ์ปค | |
| # LLM ๋ชจ๋ธ ์ค์ (ํ๊ฒฝ์ ๋ฐ๋ผ ์๋ ์ ํ) | |
| USE_OPENAI = get_env("USE_OPENAI", "False").lower() == "true" | |
| USE_DEEPSEEK = get_env("USE_DEEPSEEK", "False").lower() == "true" | |
| # ํ๊น ํ์ด์ค ํ๊ฒฝ์์๋ DeepSeek ์ฐ์ ์ฌ์ฉ | |
| if IS_HUGGINGFACE: | |
| # ํ๊น ํ์ด์ค ํ๊ฒฝ์์ DeepSeek API ํค๊ฐ ์๋์ง ํ์ธ | |
| if DEEPSEEK_API_KEY: | |
| USE_DEEPSEEK = True | |
| USE_OPENAI = False | |
| LLM_MODEL = DEEPSEEK_MODEL | |
| logger.info("HuggingFace Spaces ํ๊ฒฝ: DeepSeek ๋ชจ๋ธ ์ฌ์ฉ") | |
| else: | |
| logger.warning("HuggingFace Spaces ํ๊ฒฝ์์ DeepSeek API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
| USE_DEEPSEEK = False | |
| USE_OPENAI = False # ๊ธฐ๋ณธ์ ์ผ๋ก API ํค๊ฐ ์์ผ๋ฉด ๋นํ์ฑํ | |
| LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest") # ๋์ฒด ๋ชจ๋ธ ์ค์ | |
| logger.info(f"HuggingFace Spaces ํ๊ฒฝ: DeepSeek API ํค ์์, LLM ๋ชจ๋ธ: {LLM_MODEL}") | |
| else: | |
| # ๋ก์ปฌ ํ๊ฒฝ์์๋ ์ค์ ์ ๋ฐ๋ผ LLM ์ ํ | |
| if USE_DEEPSEEK: | |
| LLM_MODEL = DEEPSEEK_MODEL | |
| logger.info(f"๋ก์ปฌ ํ๊ฒฝ: DeepSeek ๋ชจ๋ธ ์ฌ์ฉ ({DEEPSEEK_MODEL})") | |
| elif USE_OPENAI: | |
| LLM_MODEL = get_env("LLM_MODEL", "gpt-3.5-turbo") | |
| logger.info(f"๋ก์ปฌ ํ๊ฒฝ: OpenAI ๋ชจ๋ธ ์ฌ์ฉ ({LLM_MODEL})") | |
| else: | |
| LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest") | |
| OLLAMA_HOST = get_env("OLLAMA_HOST", "http://localhost:11434") | |
| logger.info(f"๋ก์ปฌ ํ๊ฒฝ: Ollama ๋ชจ๋ธ ์ฌ์ฉ ({LLM_MODEL})") | |
| # API ํค ๊ฒ์ฆ (๋ก์ปฌ ํ๊ฒฝ๋ง) | |
| if not IS_HUGGINGFACE: | |
| if USE_DEEPSEEK and not DEEPSEEK_API_KEY: | |
| logger.warning("DeepSeek ๋ชจ๋ธ์ด ์ ํ๋์์ง๋ง API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
| USE_DEEPSEEK = False | |
| USE_OPENAI = False | |
| LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest") | |
| logger.info("DeepSeek API ํค๊ฐ ์์ด Ollama๋ก ํด๋ฐฑํฉ๋๋ค.") | |
| elif USE_OPENAI and not OPENAI_API_KEY: | |
| logger.warning("OpenAI ๋ชจ๋ธ์ด ์ ํ๋์์ง๋ง API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
| logger.warning("OpenAI API ํค๊ฐ ์์ด Ollama๋ก ํด๋ฐฑํฉ๋๋ค.") | |
| USE_OPENAI = False | |
| LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest") | |
| # DeepSeek API ํ ์คํธ ํจ์ | |
| def test_deepseek_connection(): | |
| """ | |
| DeepSeek API ์ฐ๊ฒฐ ํ ์คํธ | |
| Returns: | |
| ํ ์คํธ ๊ฒฐ๊ณผ ๋์ ๋๋ฆฌ (์ฑ๊ณต ์ฌ๋ถ ๋ฐ ๋ฉ์์ง) | |
| """ | |
| if not DEEPSEEK_API_KEY: | |
| logger.warning("DeepSeek API ํค๊ฐ ์ค์ ๋์ง ์์ ํ ์คํธ๋ฅผ ๊ฑด๋๋๋๋ค.") | |
| return { | |
| "success": False, | |
| "message": "API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.", | |
| "status_code": None | |
| } | |
| try: | |
| logger.info(f"DeepSeek API ์ฐ๊ฒฐ ํ ์คํธ ์์: {DEEPSEEK_ENDPOINT}, ๋ชจ๋ธ: {DEEPSEEK_MODEL}") | |
| # ํ ์คํธ์ฉ ๊ฐ๋จํ ํ๋กฌํํธ | |
| test_prompt = "Hello, please respond with a short greeting." | |
| # API ์์ฒญ ํค๋ ๋ฐ ๋ฐ์ดํฐ | |
| headers = { | |
| "Content-Type": "application/json", | |
| "Authorization": f"Bearer {DEEPSEEK_API_KEY}" | |
| } | |
| payload = { | |
| "model": DEEPSEEK_MODEL, | |
| "messages": [{"role": "user", "content": test_prompt}], | |
| "temperature": 0.7, | |
| "max_tokens": 50 | |
| } | |
| # API ์์ฒญ ์ ์ก | |
| response = requests.post( | |
| DEEPSEEK_ENDPOINT, | |
| headers=headers, | |
| json=payload, | |
| timeout=10 # 10์ด ํ์์์ | |
| ) | |
| # ์๋ต ํ์ธ | |
| if response.status_code == 200: | |
| logger.info("DeepSeek API ์ฐ๊ฒฐ ์ฑ๊ณต") | |
| return { | |
| "success": True, | |
| "message": "API ์ฐ๊ฒฐ ์ฑ๊ณต", | |
| "status_code": response.status_code | |
| } | |
| else: | |
| logger.error(f"DeepSeek API ์ค๋ฅ: ์ํ ์ฝ๋ {response.status_code}") | |
| error_message = "" | |
| try: | |
| error_data = response.json() | |
| error_message = error_data.get("error", {}).get("message", str(error_data)) | |
| except: | |
| error_message = response.text | |
| return { | |
| "success": False, | |
| "message": f"API ์ค๋ฅ: {error_message}", | |
| "status_code": response.status_code | |
| } | |
| except requests.exceptions.Timeout: | |
| logger.error("DeepSeek API ์์ฒญ ์๊ฐ ์ด๊ณผ") | |
| return { | |
| "success": False, | |
| "message": "API ์์ฒญ ์๊ฐ ์ด๊ณผ", | |
| "status_code": None | |
| } | |
| except requests.exceptions.ConnectionError: | |
| logger.error("DeepSeek API ์ฐ๊ฒฐ ์คํจ") | |
| return { | |
| "success": False, | |
| "message": "API ์๋ฒ ์ฐ๊ฒฐ ์คํจ", | |
| "status_code": None | |
| } | |
| except Exception as e: | |
| logger.error(f"DeepSeek API ํ ์คํธ ์ค ์์์น ๋ชปํ ์ค๋ฅ: {e}", exc_info=True) | |
| return { | |
| "success": False, | |
| "message": f"์์์น ๋ชปํ ์ค๋ฅ: {str(e)}", | |
| "status_code": None | |
| } | |
| # ๋ฒกํฐ ๊ฒ์ ์ค์ | |
| TOP_K_RETRIEVAL = int(get_env("TOP_K_RETRIEVAL", "5")) # ๋ฒกํฐ ๊ฒ์ ๊ฒฐ๊ณผ ์ | |
| TOP_K_RERANK = int(get_env("TOP_K_RERANK", "3")) # ๋ฆฌ๋ญํน ํ ์ ํํ ๊ฒฐ๊ณผ ์ | |
| # ๋ก๊น ์ค์ | |
| LOG_LEVEL = get_env("LOG_LEVEL", "INFO") | |
| LOG_FILE = get_env("LOG_FILE", "autorag.log") | |
| # ์ค์ ์ ๋ณด ์ถ๋ ฅ (๋๋ฒ๊น ์ฉ) | |
| def print_config(): | |
| """ํ์ฌ ์ค์ ์ ๋ณด๋ฅผ ๋ก๊ทธ์ ์ถ๋ ฅ""" | |
| logger.info("===== ํ์ฌ ์ค์ ์ ๋ณด =====") | |
| logger.info(f"์คํ ํ๊ฒฝ: {'HuggingFace Spaces' if IS_HUGGINGFACE else '๋ก์ปฌ'}") | |
| logger.info(f"๋ฌธ์ ๋๋ ํ ๋ฆฌ: {PDF_DIRECTORY}") | |
| logger.info(f"์บ์ ๋๋ ํ ๋ฆฌ: {CACHE_DIRECTORY}") | |
| logger.info(f"์ฒญํฌ ํฌ๊ธฐ: {CHUNK_SIZE}, ์ค๋ฒ๋ฉ: {CHUNK_OVERLAP}") | |
| logger.info(f"OpenAI ์ฌ์ฉ: {USE_OPENAI}") | |
| logger.info(f"DeepSeek ์ฌ์ฉ: {USE_DEEPSEEK}") | |
| logger.info(f"LLM ๋ชจ๋ธ: {LLM_MODEL}") | |
| if not USE_OPENAI and not USE_DEEPSEEK and not IS_HUGGINGFACE: | |
| logger.info(f"Ollama ํธ์คํธ: {OLLAMA_HOST}") | |
| logger.info(f"์๋ฒ ๋ฉ ๋ชจ๋ธ: {EMBEDDING_MODEL}") | |
| logger.info(f"๋ฆฌ๋ญ์ปค ๋ชจ๋ธ: {RERANKER_MODEL}") | |
| logger.info(f"TOP_K ๊ฒ์: {TOP_K_RETRIEVAL}, ๋ฆฌ๋ญํน: {TOP_K_RERANK}") | |
| logger.info("=========================") | |
| # ์ค์ ์ ํจ์ฑ ๊ฒ์ฌ | |
| def validate_config() -> Dict[str, Any]: | |
| """ | |
| ํ์ฌ ์ค์ ์ ์ ํจ์ฑ์ ๊ฒ์ฌํ๊ณ ๊ฒฝ๊ณ ๋ ์ค๋ฅ๋ฅผ ๋ก๊ทธ์ ๊ธฐ๋ก | |
| Returns: | |
| ๊ฒ์ฆ ๊ฒฐ๊ณผ (status: ์ํ, warnings: ๊ฒฝ๊ณ ๋ชฉ๋ก) | |
| """ | |
| warnings = [] | |
| # ๋๋ ํ ๋ฆฌ ํ์ธ | |
| if not os.path.exists(PDF_DIRECTORY): | |
| warnings.append(f"PDF ๋๋ ํ ๋ฆฌ({PDF_DIRECTORY})๊ฐ ์กด์ฌํ์ง ์์ต๋๋ค.") | |
| # API ํค ํ์ธ (ํ๊น ํ์ด์ค์ ๋ก์ปฌ ํ๊ฒฝ ๊ตฌ๋ถ) | |
| if IS_HUGGINGFACE: | |
| if USE_DEEPSEEK and not DEEPSEEK_API_KEY: | |
| warnings.append("ํ๊น ํ์ด์ค ํ๊ฒฝ์์ DeepSeek ์ฌ์ฉ์ด ์ค์ ๋์์ง๋ง API ํค๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค.") | |
| else: | |
| if USE_OPENAI and not OPENAI_API_KEY: | |
| warnings.append("OpenAI ์ฌ์ฉ์ด ์ค์ ๋์์ง๋ง API ํค๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค.") | |
| if USE_DEEPSEEK and not DEEPSEEK_API_KEY: | |
| warnings.append("DeepSeek ์ฌ์ฉ์ด ์ค์ ๋์์ง๋ง API ํค๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค.") | |
| # ๋ชจ๋ธ ๋ฐ ์ค์ ๊ฐ ํ์ธ | |
| if CHUNK_SIZE <= CHUNK_OVERLAP: | |
| warnings.append(f"์ฒญํฌ ํฌ๊ธฐ({CHUNK_SIZE})๊ฐ ์ค๋ฒ๋ฉ({CHUNK_OVERLAP})๋ณด๋ค ์๊ฑฐ๋ ๊ฐ์ต๋๋ค.") | |
| # DeepSeek API ์ฐ๊ฒฐ ํ์ธ (์ค์ ๋ ๊ฒฝ์ฐ) | |
| if USE_DEEPSEEK and DEEPSEEK_API_KEY: | |
| deepseek_test_result = test_deepseek_connection() | |
| if not deepseek_test_result["success"]: | |
| warnings.append(f"DeepSeek API ์ฐ๊ฒฐ ํ ์คํธ ์คํจ: {deepseek_test_result['message']}") | |
| # ๊ฒฐ๊ณผ ๊ธฐ๋ก | |
| if warnings: | |
| for warning in warnings: | |
| logger.warning(warning) | |
| return { | |
| "status": "valid" if not warnings else "warnings", | |
| "warnings": warnings | |
| } | |
| # ์ค์ ๋ก๋ ์ ์คํ | |
| print_config() | |
| config_status = validate_config() |