Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| class Config: | |
| """RAG ์์คํ ํตํฉ ์ค์ ํด๋์ค""" | |
| def __init__(self): | |
| # .env ํ์ผ ๋ก๋ | |
| load_dotenv() | |
| # ===== API ํค ===== | |
| self.OPENAI_API_KEY = self._get_api_key() | |
| # ===== ๊ฒฝ๋ก ์ค์ ===== | |
| # ์ ์ฒ๋ฆฌ | |
| self.META_CSV_PATH = "./data/data_list.csv" | |
| self.BASE_FOLDER_PATH = "./data/files/" | |
| self.OUTPUT_CHUNKS_PATH = "./data/rag_chunks_final.csv" | |
| # RAG - ํ๊ฒฝ๋ณ์ ์ฐ์ , ์์ผ๋ฉด ๊ธฐ๋ณธ๊ฐ | |
| self.RAG_INPUT_PATH = "./data/rag_chunks_final.csv" | |
| self.DB_DIRECTORY = os.getenv("CHROMA_DB_PATH", "./chroma_db") | |
| # ===== ์ ์ฒ๋ฆฌ ์ค์ ===== | |
| self.CHUNK_SIZE = 1000 | |
| self.CHUNK_OVERLAP = 200 | |
| self.SEPARATORS = ["\n\n", "\n", " ", ""] | |
| self.MIN_TEXT_LENGTH = 100 | |
| # ===== ์๋ฒ ๋ฉ ์ค์ ===== | |
| self.EMBEDDING_MODEL_NAME = "text-embedding-3-small" | |
| self.BATCH_SIZE = 50 | |
| self.MAX_TOKENS_PER_BATCH = 250000 | |
| # ์ฒญํฌ ๊ฒ์ฆ ๊ธฐ์ค | |
| self.MIN_CHUNK_LENGTH = 10 | |
| self.MAX_CHUNK_LENGTH = 10000 | |
| # ===== ๋ฒกํฐ DB ์ค์ ===== | |
| self.COLLECTION_NAME = "rag_documents" | |
| # ===== ๊ฒ์ ์ค์ ===== | |
| self.DEFAULT_TOP_K = 5 | |
| self.DEFAULT_ALPHA = 0.5 | |
| self.DEFAULT_SEARCH_MODE = "hybrid_rerank" | |
| # ===== LLM ์ค์ ===== | |
| self.LLM_MODEL_NAME = "gpt-4o-mini" | |
| self.DEFAULT_TEMPERATURE = 0.0 | |
| self.DEFAULT_MAX_TOKENS = 1000 | |
| # ์์คํ ํ๋กฌํํธ | |
| self.SYSTEM_PROMPT = "๋น์ ์ RFP(์ ์์์ฒญ์) ๋ถ์ ๋ฐ ์์ฝ ์ ๋ฌธ๊ฐ์ ๋๋ค." | |
| # ===== GGUF ๋ก์ปฌ ๋ชจ๋ธ ์ค์ ===== | |
| # Model Hub ์ฌ์ฉ ์ฌ๋ถ (ํ๊ฒฝ๋ณ์ ์ฐ์ ) | |
| self.USE_MODEL_HUB = os.getenv("USE_MODEL_HUB", "true").lower() == "true" | |
| # Hugging Face Model Hub ์ค์ | |
| # 1. QLoRA ๋ชจ๋ธ (Fine-tuned) - ๊ธฐ์กด ์๋น์ค์ฉ | |
| self.MODEL_HUB_REPO = os.getenv( | |
| "MODEL_HUB_REPO", | |
| "Dongjin1203/RFP_Documents_chatbot" | |
| ) | |
| self.MODEL_HUB_FILENAME = os.getenv( | |
| "MODEL_HUB_FILENAME", | |
| "Llama-3-Open-Ko-8B.Q4_K_M.gguf" | |
| ) | |
| # 2. Base ๋ชจ๋ธ (PEFT ์์) - ๋น๊ต ์คํ์ฉ | |
| self.BASE_MODEL_HUB_REPO = os.getenv( | |
| "BASE_MODEL_HUB_REPO", | |
| "Dongjin1203/Llama-3-Open-Ko-8B-GGUF" | |
| ) | |
| self.BASE_MODEL_HUB_FILENAME = os.getenv( | |
| "BASE_MODEL_HUB_FILENAME", | |
| "Llama-3-Open-Ko-8B-Q4_K_M.gguf" | |
| ) | |
| # ๊ณตํต ์บ์ ๋๋ ํ ๋ฆฌ | |
| self.MODEL_CACHE_DIR = os.getenv("MODEL_CACHE_DIR", ".cache/models") | |
| # ๋ก์ปฌ ๊ฒฝ๋ก (USE_MODEL_HUB=false์ธ ๊ฒฝ์ฐ) | |
| self.GGUF_MODEL_PATH = os.getenv("GGUF_MODEL_PATH", ".cache/models/Llama-3-Open-Ko-8B.Q4_K_M.gguf") | |
| # GGUF GPU ์ค์ (T4 Medium ์ต์ ํ - 8B ๋ชจ๋ธ์ฉ) | |
| self.GGUF_N_GPU_LAYERS = int(os.getenv("GGUF_N_GPU_LAYERS", "35")) # T4์์ 8B ๋ชจ๋ธ ์ ์ฒด๋ฅผ GPU์ ๋ก๋ | |
| self.GGUF_N_CTX = int(os.getenv("GGUF_N_CTX", "4096")) # ์ปจํ ์คํธ ๊ธธ์ด | |
| self.GGUF_N_THREADS = int(os.getenv("GGUF_N_THREADS", "4")) # CPU ์ค๋ ๋ (GPU ์ฌ์ฉ ์ ๋ฎ๊ฒ) | |
| self.GGUF_MAX_NEW_TOKENS = int(os.getenv("GGUF_MAX_NEW_TOKENS", "512")) # ์ต๋ ์์ฑ ํ ํฐ | |
| self.GGUF_TEMPERATURE = float(os.getenv("GGUF_TEMPERATURE", "0.7")) # ์์ฑ ๋ค์์ฑ | |
| self.GGUF_TOP_P = float(os.getenv("GGUF_TOP_P", "0.9")) # Nucleus sampling | |
| def _get_api_key(self) -> str: | |
| """ํ๊ฒฝ๋ณ์์์ API ํค ๋ก๋""" | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| if not api_key: | |
| raise ValueError( | |
| "OPENAI_API_KEY๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.\n" | |
| "ํ๋ก์ ํธ ๋ฃจํธ์ .env ํ์ผ์ ๋ง๋ค๊ณ OPENAI_API_KEY=your-key ๋ฅผ ์ถ๊ฐํ์ธ์." | |
| ) | |
| return api_key | |
| def validate_preprocess(self): | |
| """์ ์ฒ๋ฆฌ ์ค์ ์ ํจ์ฑ ๊ฒ์ฌ""" | |
| if not os.path.exists(self.META_CSV_PATH): | |
| raise FileNotFoundError( | |
| f"๋ฉํ CSV ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค: {self.META_CSV_PATH}" | |
| ) | |
| if not os.path.exists(self.BASE_FOLDER_PATH): | |
| raise FileNotFoundError( | |
| f"ํ์ผ ํด๋๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค: {self.BASE_FOLDER_PATH}" | |
| ) | |
| output_dir = os.path.dirname(self.OUTPUT_CHUNKS_PATH) | |
| if output_dir: | |
| os.makedirs(output_dir, exist_ok=True) | |
| return True | |
| def validate_rag(self): | |
| """RAG ์ค์ ์ ํจ์ฑ ๊ฒ์ฌ""" | |
| if not self.OPENAI_API_KEY: | |
| raise ValueError("OPENAI_API_KEY๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค") | |
| return True | |
| def validate_gguf(self): | |
| """GGUF ์ค์ ์ ํจ์ฑ ๊ฒ์ฌ""" | |
| if not self.USE_MODEL_HUB: | |
| # ๋ก์ปฌ ํ์ผ ์ฌ์ฉ ์ ๊ฒฝ๋ก ํ์ธ | |
| if not os.path.exists(self.GGUF_MODEL_PATH): | |
| print(f"โ ๏ธ ๊ฒฝ๊ณ : GGUF ๋ชจ๋ธ ํ์ผ์ด ์์ต๋๋ค: {self.GGUF_MODEL_PATH}") | |
| print(f" USE_MODEL_HUB=true๋ก ์ค์ ํ์ฌ ์๋ ๋ค์ด๋ก๋ํ๊ฑฐ๋ ๋ชจ๋ธ ํ์ผ์ ์ค๋นํ์ธ์.") | |
| # GPU ๋ ์ด์ด ์ค์ ํ์ธ | |
| if self.GGUF_N_GPU_LAYERS > 0: | |
| print(f"โ GPU ๊ฐ์ ํ์ฑํ: {self.GGUF_N_GPU_LAYERS}๊ฐ ๋ ์ด์ด") | |
| else: | |
| print(f"โ ๏ธ CPU ์ ์ฉ ๋ชจ๋ (n_gpu_layers=0)") | |
| return True | |
| def validate_all(self): | |
| """์ ์ฒด ์ค์ ์ ํจ์ฑ ๊ฒ์ฌ""" | |
| self.validate_preprocess() | |
| self.validate_rag() | |
| self.validate_gguf() | |
| return True | |
| def validate(self): | |
| """์ค์ ์ ํจ์ฑ ๊ฒ์ฌ (ํ์ ํธํ์ฑ)""" | |
| return self.validate_preprocess() | |
| def print_gguf_config(self): | |
| """GGUF ์ค์ ์ถ๋ ฅ (๋๋ฒ๊น ์ฉ)""" | |
| print("\n" + "="*50) | |
| print("GGUF ๋ชจ๋ธ ์ค์ ") | |
| print("="*50) | |
| print(f"Model Hub ์ฌ์ฉ: {self.USE_MODEL_HUB}") | |
| if self.USE_MODEL_HUB: | |
| print(f"\n[QLoRA ๋ชจ๋ธ]") | |
| print(f" Hub Repo: {self.MODEL_HUB_REPO}") | |
| print(f" Hub ํ์ผ๋ช : {self.MODEL_HUB_FILENAME}") | |
| print(f"\n[Base ๋ชจ๋ธ]") | |
| print(f" Hub Repo: {self.BASE_MODEL_HUB_REPO}") | |
| print(f" Hub ํ์ผ๋ช : {self.BASE_MODEL_HUB_FILENAME}") | |
| print(f"\n[๊ณตํต]") | |
| print(f" ์บ์ ๋๋ ํ ๋ฆฌ: {self.MODEL_CACHE_DIR}") | |
| else: | |
| print(f"๋ก์ปฌ ๊ฒฝ๋ก: {self.GGUF_MODEL_PATH}") | |
| print(f"\nGPU ์ค์ :") | |
| print(f" - GPU ๋ ์ด์ด: {self.GGUF_N_GPU_LAYERS}") | |
| print(f" - ์ปจํ ์คํธ: {self.GGUF_N_CTX}") | |
| print(f" - ์ค๋ ๋: {self.GGUF_N_THREADS}") | |
| print(f"\n์์ฑ ์ค์ :") | |
| print(f" - Max Tokens: {self.GGUF_MAX_NEW_TOKENS}") | |
| print(f" - Temperature: {self.GGUF_TEMPERATURE}") | |
| print(f" - Top-P: {self.GGUF_TOP_P}") | |
| print("="*50 + "\n") | |
| # ํ์ ํธํ์ฑ์ ์ํ ๋ณ์นญ | |
| PreprocessConfig = Config | |
| RAGConfig = Config |